[libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver

The current migration protocol has several flaws - No initial hook on the source host to do work before the dst VM is launched - No ability to restart src VM if dst fails to recv all migration data, but src successfully sent it all This introduces a new 5 step migration process to address this limitation. To support features such as seemless migration of SPICE clients, and lock driver state passing this now makes use of the migration cookie feature too

Move the qemudStartVMDaemon and qemudShutdownVMDaemon methods into a separate file, renaming them to qemuProcessStart, qemuProcessStop. All helper methods called by these are also moved & renamed to match * src/Makefile.am: Add qemu_process.c/.h * src/qemu/qemu_command.c: Add emuDomainAssignPCIAddresses * src/qemu/qemu_command.h: Add VNC port min/max * src/qemu/qemu_domain.c, src/qemu/qemu_domain.h: Add domain event queue helpers * src/qemu/qemu_driver.c, src/qemu/qemu_driver.h: Remove all QEMU process startup/shutdown functions * src/qemu/qemu_process.c, src/qemu/qemu_process.h: Add all QEMU process startup/shutdown functions --- po/POTFILES.in | 1 + src/Makefile.am | 1 + src/qemu/qemu_command.c | 29 + src/qemu/qemu_command.h | 5 + src/qemu/qemu_domain.c | 56 + src/qemu/qemu_domain.h | 11 + src/qemu/qemu_driver.c | 3535 ++++++++--------------------------------------- src/qemu/qemu_driver.h | 31 +- src/qemu/qemu_process.c | 2417 ++++++++++++++++++++++++++++++++ src/qemu/qemu_process.h | 52 + 10 files changed, 3121 insertions(+), 3017 deletions(-) create mode 100644 src/qemu/qemu_process.c create mode 100644 src/qemu/qemu_process.h diff --git a/po/POTFILES.in b/po/POTFILES.in index 5f2ed75..343fe5d 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -61,6 +61,7 @@ src/qemu/qemu_hotplug.c src/qemu/qemu_monitor.c src/qemu/qemu_monitor_json.c src/qemu/qemu_monitor_text.c +src/qemu/qemu_process.c src/remote/remote_driver.c src/secret/secret_driver.c src/security/security_apparmor.c diff --git a/src/Makefile.am b/src/Makefile.am index 2f94efd..15a4e8c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -281,6 +281,7 @@ QEMU_DRIVER_SOURCES = \ qemu/qemu_hostdev.c qemu/qemu_hostdev.h \ qemu/qemu_hotplug.c qemu/qemu_hotplug.h \ qemu/qemu_conf.c qemu/qemu_conf.h \ + qemu/qemu_process.c qemu/qemu_process.h \ qemu/qemu_monitor.c qemu/qemu_monitor.h \ qemu/qemu_monitor_text.c \ qemu/qemu_monitor_text.h \ diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f78ce71..c53a706 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -715,6 +715,35 @@ static int qemuCollectPCIAddress(virDomainDefPtr def ATTRIBUTE_UNUSED, } +int +qemuDomainAssignPCIAddresses(virDomainDefPtr def) +{ + int ret = -1; + unsigned long long qemuCmdFlags = 0; + qemuDomainPCIAddressSetPtr addrs = NULL; + + if (qemuCapsExtractVersionInfo(def->emulator, + NULL, + &qemuCmdFlags) < 0) + goto cleanup; + + if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { + if (!(addrs = qemuDomainPCIAddressSetCreate(def))) + goto cleanup; + + if (qemuAssignDevicePCISlots(def, addrs) < 0) + goto cleanup; + } + + ret = 0; + +cleanup: + qemuDomainPCIAddressSetFree(addrs); + + return ret; +} + + qemuDomainPCIAddressSetPtr qemuDomainPCIAddressSetCreate(virDomainDefPtr def) { qemuDomainPCIAddressSetPtr addrs; diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index 8135046..52d169a 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -37,6 +37,10 @@ # define QEMU_VIRTIO_SERIAL_PREFIX "virtio-serial" # define QEMU_FSDEV_HOST_PREFIX "fsdev-" +# define QEMU_VNC_PORT_MIN 5900 +# define QEMU_VNC_PORT_MAX 65535 + + virCommandPtr qemuBuildCommandLine(virConnectPtr conn, struct qemud_driver *driver, virDomainDefPtr def, @@ -134,6 +138,7 @@ virDomainDefPtr qemuParseCommandLine(virCapsPtr caps, virDomainDefPtr qemuParseCommandLineString(virCapsPtr caps, const char *args); +int qemuDomainAssignPCIAddresses(virDomainDefPtr def); qemuDomainPCIAddressSetPtr qemuDomainPCIAddressSetCreate(virDomainDefPtr def); int qemuDomainPCIAddressReserveSlot(qemuDomainPCIAddressSetPtr addrs, int slot); diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index fa7c8bd..e3163ab 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -29,6 +29,7 @@ #include "logging.h" #include "virterror_internal.h" #include "c-ctype.h" +#include "event.h" #include <sys/time.h> @@ -41,6 +42,61 @@ #define timeval_to_ms(tv) (((tv).tv_sec * 1000ull) + ((tv).tv_usec / 1000)) +static void qemuDomainEventDispatchFunc(virConnectPtr conn, + virDomainEventPtr event, + virConnectDomainEventGenericCallback cb, + void *cbopaque, + void *opaque) +{ + struct qemud_driver *driver = opaque; + + /* Drop the lock whle dispatching, for sake of re-entrancy */ + qemuDriverUnlock(driver); + virDomainEventDispatchDefaultFunc(conn, event, cb, cbopaque, NULL); + qemuDriverLock(driver); +} + +void qemuDomainEventFlush(int timer ATTRIBUTE_UNUSED, void *opaque) +{ + struct qemud_driver *driver = opaque; + virDomainEventQueue tempQueue; + + qemuDriverLock(driver); + + driver->domainEventDispatching = 1; + + /* Copy the queue, so we're reentrant safe */ + tempQueue.count = driver->domainEventQueue->count; + tempQueue.events = driver->domainEventQueue->events; + driver->domainEventQueue->count = 0; + driver->domainEventQueue->events = NULL; + + virEventUpdateTimeout(driver->domainEventTimer, -1); + virDomainEventQueueDispatch(&tempQueue, + driver->domainEventCallbacks, + qemuDomainEventDispatchFunc, + driver); + + /* Purge any deleted callbacks */ + virDomainEventCallbackListPurgeMarked(driver->domainEventCallbacks); + + driver->domainEventDispatching = 0; + qemuDriverUnlock(driver); +} + + +/* driver must be locked before calling */ +void qemuDomainEventQueue(struct qemud_driver *driver, + virDomainEventPtr event) +{ + if (virDomainEventQueuePush(driver->domainEventQueue, + event) < 0) + virDomainEventFree(event); + if (driver->domainEventQueue->count == 1) + virEventUpdateTimeout(driver->domainEventTimer, 0); +} + + static void *qemuDomainObjPrivateAlloc(void) { qemuDomainObjPrivatePtr priv; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index f14fb79..4333a78 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -77,6 +77,17 @@ struct _qemuDomainObjPrivate { int persistentAddrs; }; +struct qemuDomainWatchdogEvent +{ + virDomainObjPtr vm; + int action; +}; + +void qemuDomainEventFlush(int timer ATTRIBUTE_UNUSED, void *opaque); + +/* driver must be locked before calling */ +void qemuDomainEventQueue(struct qemud_driver *driver, + virDomainEventPtr event); void qemuDomainSetPrivateDataHooks(virCapsPtr caps); void qemuDomainSetNamespaceHooks(virCapsPtr caps); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 52ea98e..9cc6e89 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -46,9 +46,6 @@ #include <sys/un.h> -#include "virterror_internal.h" -#include "logging.h" -#include "datatypes.h" #include "qemu_driver.h" #include "qemu_conf.h" #include "qemu_capabilities.h" @@ -59,7 +56,11 @@ #include "qemu_monitor.h" #include "qemu_bridge_filter.h" #include "qemu_audit.h" -#include "c-ctype.h" +#include "qemu_process.h" + +#include "virterror_internal.h" +#include "logging.h" +#include "datatypes.h" #include "event.h" #include "buf.h" #include "util.h" @@ -88,169 +89,48 @@ #define VIR_FROM_THIS VIR_FROM_QEMU -#define QEMU_VNC_PORT_MIN 5900 -#define QEMU_VNC_PORT_MAX 65535 - #define QEMU_NB_MEM_PARAM 3 +#if HAVE_LINUX_KVM_H +# include <linux/kvm.h> +#endif -#define timeval_to_ms(tv) (((tv).tv_sec * 1000ull) + ((tv).tv_usec / 1000)) +/* device for kvm ioctls */ +#define KVM_DEVICE "/dev/kvm" -struct watchdogEvent -{ - virDomainObjPtr vm; - int action; -}; +/* add definitions missing in older linux/kvm.h */ +#ifndef KVMIO +# define KVMIO 0xAE +#endif +#ifndef KVM_CHECK_EXTENSION +# define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03) +#endif +#ifndef KVM_CAP_NR_VCPUS +# define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */ +#endif + + +#define timeval_to_ms(tv) (((tv).tv_sec * 1000ull) + ((tv).tv_usec / 1000)) static void processWatchdogEvent(void *data, void *opaque); static int qemudShutdown(void); -static void qemuDomainEventFlush(int timer, void *opaque); -static void qemuDomainEventQueue(struct qemud_driver *driver, - virDomainEventPtr event); - static int qemudDomainObjStart(virConnectPtr conn, struct qemud_driver *driver, virDomainObjPtr vm, bool start_paused); -static int qemudStartVMDaemon(virConnectPtr conn, - struct qemud_driver *driver, - virDomainObjPtr vm, - const char *migrateFrom, - bool start_paused, - int stdin_fd, - const char *stdin_path, - enum virVMOperationType vmop); - -static void qemudShutdownVMDaemon(struct qemud_driver *driver, - virDomainObjPtr vm, - int migrated); - static int qemudDomainGetMaxVcpus(virDomainPtr dom); -static int qemuDetectVcpuPIDs(struct qemud_driver *driver, - virDomainObjPtr vm); - -static int qemudVMFiltersInstantiate(virConnectPtr conn, - virDomainDefPtr def); - -static struct qemud_driver *qemu_driver = NULL; - - -static int doStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm, virConnectPtr conn) -{ - int ret; - qemuDomainObjPrivatePtr priv = vm->privateData; - - qemuDomainObjEnterMonitorWithDriver(driver, vm); - ret = qemuMonitorStartCPUs(priv->mon, conn); - if (ret == 0) { - vm->state = VIR_DOMAIN_RUNNING; - } - qemuDomainObjExitMonitorWithDriver(driver, vm); - - return ret; -} - -static int doStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm) -{ - int ret; - int oldState = vm->state; - qemuDomainObjPrivatePtr priv = vm->privateData; - - vm->state = VIR_DOMAIN_PAUSED; - qemuDomainObjEnterMonitorWithDriver(driver, vm); - ret = qemuMonitorStopCPUs(priv->mon); - qemuDomainObjExitMonitorWithDriver(driver, vm); - if (ret < 0) { - vm->state = oldState; - } - return ret; -} - - -static int -qemudLogFD(struct qemud_driver *driver, const char* name, bool append) -{ - char *logfile; - mode_t logmode; - int fd = -1; - - if (virAsprintf(&logfile, "%s/%s.log", driver->logDir, name) < 0) { - virReportOOMError(); - return -1; - } - - logmode = O_CREAT | O_WRONLY; - /* Only logrotate files in /var/log, so only append if running privileged */ - if (driver->privileged || append) - logmode |= O_APPEND; - else - logmode |= O_TRUNC; - - if ((fd = open(logfile, logmode, S_IRUSR | S_IWUSR)) < 0) { - virReportSystemError(errno, - _("failed to create logfile %s"), - logfile); - VIR_FREE(logfile); - return -1; - } - VIR_FREE(logfile); - if (virSetCloseExec(fd) < 0) { - virReportSystemError(errno, "%s", - _("Unable to set VM logfile close-on-exec flag")); - VIR_FORCE_CLOSE(fd); - return -1; - } - return fd; -} - - -static int -qemudLogReadFD(const char* logDir, const char* name, off_t pos) -{ - char *logfile; - mode_t logmode = O_RDONLY; - int fd = -1; - - if (virAsprintf(&logfile, "%s/%s.log", logDir, name) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("failed to build logfile name %s/%s.log"), - logDir, name); - return -1; - } - - if ((fd = open(logfile, logmode)) < 0) { - virReportSystemError(errno, - _("failed to create logfile %s"), - logfile); - VIR_FREE(logfile); - return -1; - } - if (virSetCloseExec(fd) < 0) { - virReportSystemError(errno, "%s", - _("Unable to set VM logfile close-on-exec flag")); - VIR_FORCE_CLOSE(fd); - VIR_FREE(logfile); - return -1; - } - if (pos < 0 || lseek(fd, pos, SEEK_SET) < 0) { - virReportSystemError(pos < 0 ? 0 : errno, - _("Unable to seek to %lld in %s"), - (long long) pos, logfile); - VIR_FORCE_CLOSE(fd); - } - VIR_FREE(logfile); - return fd; -} +struct qemud_driver *qemu_driver = NULL; struct qemuAutostartData { struct qemud_driver *driver; virConnectPtr conn; }; + static void qemuAutostartDomain(void *payload, const char *name ATTRIBUTE_UNUSED, void *opaque) { @@ -283,8 +163,10 @@ qemuAutostartDomain(void *payload, const char *name ATTRIBUTE_UNUSED, void *opaq virDomainObjUnlock(vm); } + static void -qemudAutostartConfigs(struct qemud_driver *driver) { +qemuAutostartDomains(struct qemud_driver *driver) +{ /* XXX: Figure out a better way todo this. The domain * startup code needs a connection handle in order * to lookup the bridge associated with a virtual @@ -304,2852 +186,605 @@ qemudAutostartConfigs(struct qemud_driver *driver) { virConnectClose(conn); } - -/** - * qemudRemoveDomainStatus - * - * remove all state files of a domain from statedir - * - * Returns 0 on success - */ static int -qemudRemoveDomainStatus(struct qemud_driver *driver, - virDomainObjPtr vm) +qemuSecurityInit(struct qemud_driver *driver) { - char ebuf[1024]; - char *file = NULL; - - if (virAsprintf(&file, "%s/%s.xml", driver->stateDir, vm->def->name) < 0) { - virReportOOMError(); - return(-1); - } - - if (unlink(file) < 0 && errno != ENOENT && errno != ENOTDIR) - VIR_WARN("Failed to remove domain XML for %s: %s", - vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf))); - VIR_FREE(file); + virSecurityManagerPtr mgr = virSecurityManagerNew(driver->securityDriverName, + driver->allowDiskFormatProbing); + if (!mgr) + goto error; - if (virFileDeletePid(driver->stateDir, vm->def->name) != 0) - VIR_WARN("Failed to remove PID file for %s: %s", - vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf))); + if (driver->privileged) { + virSecurityManagerPtr dac = virSecurityManagerNewDAC(driver->user, + driver->group, + driver->allowDiskFormatProbing, + driver->dynamicOwnership); + if (!dac) + goto error; + if (!(driver->securityManager = virSecurityManagerNewStack(mgr, + dac))) + goto error; + } else { + driver->securityManager = mgr; + } return 0; -} - -/* - * This is a callback registered with a qemuMonitorPtr instance, - * and to be invoked when the monitor console hits an end of file - * condition, or error, thus indicating VM shutdown should be - * performed - */ -static void -qemuHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED, - virDomainObjPtr vm, - int hasError) { - struct qemud_driver *driver = qemu_driver; - virDomainEventPtr event = NULL; - qemuDomainObjPrivatePtr priv; +error: + VIR_ERROR0(_("Failed to initialize security drivers")); + virSecurityManagerFree(mgr); + return -1; +} - VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name); - virDomainObjLock(vm); +static virCapsPtr +qemuCreateCapabilities(virCapsPtr oldcaps, + struct qemud_driver *driver) +{ + virCapsPtr caps; - if (!virDomainObjIsActive(vm)) { - VIR_DEBUG("Domain %p is not active, ignoring EOF", vm); - virDomainObjUnlock(vm); - return; + /* Basic host arch / guest machine capabilities */ + if (!(caps = qemuCapsInit(oldcaps))) { + virReportOOMError(); + return NULL; } - priv = vm->privateData; - if (!hasError && priv->monJSON && !priv->gotShutdown) { - VIR_DEBUG("Monitor connection to '%s' closed without SHUTDOWN event; " - "assuming the domain crashed", vm->def->name); - hasError = 1; + if (driver->allowDiskFormatProbing) { + caps->defaultDiskDriverName = NULL; + caps->defaultDiskDriverType = NULL; + } else { + caps->defaultDiskDriverName = "qemu"; + caps->defaultDiskDriverType = "raw"; } - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_STOPPED, - hasError ? - VIR_DOMAIN_EVENT_STOPPED_FAILED : - VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN); - - qemudShutdownVMDaemon(driver, vm, 0); - qemuDomainStopAudit(vm, hasError ? "failed" : "shutdown"); - - if (!vm->persistent) - virDomainRemoveInactive(&driver->domains, vm); - else - virDomainObjUnlock(vm); + qemuDomainSetPrivateDataHooks(caps); + qemuDomainSetNamespaceHooks(caps); - if (event) { - qemuDriverLock(driver); - qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); + if (virGetHostUUID(caps->host.host_uuid)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("cannot get the host uuid")); + goto err_exit; } -} - - -static virDomainDiskDefPtr -findDomainDiskByPath(virDomainObjPtr vm, - const char *path) -{ - int i; - for (i = 0; i < vm->def->ndisks; i++) { - virDomainDiskDefPtr disk; + /* Security driver data */ + const char *doi, *model; - disk = vm->def->disks[i]; - if (disk->src != NULL && STREQ(disk->src, path)) - return disk; + doi = virSecurityManagerGetDOI(driver->securityManager); + model = virSecurityManagerGetModel(driver->securityManager); + if (STRNEQ(model, "none")) { + if (!(caps->host.secModel.model = strdup(model))) + goto no_memory; + if (!(caps->host.secModel.doi = strdup(doi))) + goto no_memory; } - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("no disk found with path %s"), - path); - return NULL; -} - -static virDomainDiskDefPtr -findDomainDiskByAlias(virDomainObjPtr vm, - const char *alias) -{ - int i; - - if (STRPREFIX(alias, QEMU_DRIVE_HOST_PREFIX)) - alias += strlen(QEMU_DRIVE_HOST_PREFIX); - - for (i = 0; i < vm->def->ndisks; i++) { - virDomainDiskDefPtr disk; + VIR_DEBUG("Initialized caps for security driver \"%s\" with " + "DOI \"%s\"", model, doi); - disk = vm->def->disks[i]; - if (disk->info.alias != NULL && STREQ(disk->info.alias, alias)) - return disk; - } + return caps; - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("no disk found with alias %s"), - alias); +no_memory: + virReportOOMError(); +err_exit: + virCapabilitiesFree(caps); return NULL; } -static int -getVolumeQcowPassphrase(virConnectPtr conn, - virDomainDiskDefPtr disk, - char **secretRet, - size_t *secretLen) +static void qemuDomainSnapshotLoad(void *payload, + const char *name ATTRIBUTE_UNUSED, + void *data) { - virSecretPtr secret; - char *passphrase; - unsigned char *data; - size_t size; - int ret = -1; - virStorageEncryptionPtr enc; - - if (!disk->encryption) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("disk %s does not have any encryption information"), - disk->src); - return -1; - } - enc = disk->encryption; - - if (!conn) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("cannot find secrets without a connection")); - goto cleanup; - } - - if (conn->secretDriver == NULL || - conn->secretDriver->lookupByUUID == NULL || - conn->secretDriver->getValue == NULL) { - qemuReportError(VIR_ERR_NO_SUPPORT, "%s", - _("secret storage not supported")); - goto cleanup; - } + virDomainObjPtr vm = (virDomainObjPtr)payload; + char *baseDir = (char *)data; + char *snapDir = NULL; + DIR *dir = NULL; + struct dirent *entry; + char *xmlStr; + int ret; + char *fullpath; + virDomainSnapshotDefPtr def = NULL; + char ebuf[1024]; - if (enc->format != VIR_STORAGE_ENCRYPTION_FORMAT_QCOW || - enc->nsecrets != 1 || - enc->secrets[0]->type != - VIR_STORAGE_ENCRYPTION_SECRET_TYPE_PASSPHRASE) { - qemuReportError(VIR_ERR_XML_ERROR, - _("invalid <encryption> for volume %s"), disk->src); + virDomainObjLock(vm); + if (virAsprintf(&snapDir, "%s/%s", baseDir, vm->def->name) < 0) { + VIR_ERROR(_("Failed to allocate memory for snapshot directory for domain %s"), + vm->def->name); goto cleanup; } - secret = conn->secretDriver->lookupByUUID(conn, - enc->secrets[0]->uuid); - if (secret == NULL) - goto cleanup; - data = conn->secretDriver->getValue(secret, &size, - VIR_SECRET_GET_VALUE_INTERNAL_CALL); - virUnrefSecret(secret); - if (data == NULL) - goto cleanup; - - if (memchr(data, '\0', size) != NULL) { - memset(data, 0, size); - VIR_FREE(data); - qemuReportError(VIR_ERR_XML_ERROR, - _("format='qcow' passphrase for %s must not contain a " - "'\\0'"), disk->src); - goto cleanup; - } + VIR_INFO("Scanning for snapshots for domain %s in %s", vm->def->name, + snapDir); - if (VIR_ALLOC_N(passphrase, size + 1) < 0) { - memset(data, 0, size); - VIR_FREE(data); - virReportOOMError(); + if (!(dir = opendir(snapDir))) { + if (errno != ENOENT) + VIR_ERROR(_("Failed to open snapshot directory %s for domain %s: %s"), + snapDir, vm->def->name, + virStrerror(errno, ebuf, sizeof(ebuf))); goto cleanup; } - memcpy(passphrase, data, size); - passphrase[size] = '\0'; - memset(data, 0, size); - VIR_FREE(data); + while ((entry = readdir(dir))) { + if (entry->d_name[0] == '.') + continue; - *secretRet = passphrase; - *secretLen = size; + /* NB: ignoring errors, so one malformed config doesn't + kill the whole process */ + VIR_INFO("Loading snapshot file '%s'", entry->d_name); - ret = 0; - -cleanup: - return ret; -} - -static int -findVolumeQcowPassphrase(qemuMonitorPtr mon ATTRIBUTE_UNUSED, - virConnectPtr conn, - virDomainObjPtr vm, - const char *path, - char **secretRet, - size_t *secretLen) -{ - virDomainDiskDefPtr disk; - int ret = -1; - - virDomainObjLock(vm); - disk = findDomainDiskByPath(vm, path); - - if (!disk) - goto cleanup; - - ret = getVolumeQcowPassphrase(conn, disk, secretRet, secretLen); - -cleanup: - virDomainObjUnlock(vm); - return ret; -} + if (virAsprintf(&fullpath, "%s/%s", snapDir, entry->d_name) < 0) { + VIR_ERROR0(_("Failed to allocate memory for path")); + continue; + } + ret = virFileReadAll(fullpath, 1024*1024*1, &xmlStr); + if (ret < 0) { + /* Nothing we can do here, skip this one */ + VIR_ERROR(_("Failed to read snapshot file %s: %s"), fullpath, + virStrerror(errno, ebuf, sizeof(ebuf))); + VIR_FREE(fullpath); + continue; + } -static int -qemuHandleDomainReset(qemuMonitorPtr mon ATTRIBUTE_UNUSED, - virDomainObjPtr vm) -{ - struct qemud_driver *driver = qemu_driver; - virDomainEventPtr event; + def = virDomainSnapshotDefParseString(xmlStr, 0); + if (def == NULL) { + /* Nothing we can do here, skip this one */ + VIR_ERROR(_("Failed to parse snapshot XML from file '%s'"), fullpath); + VIR_FREE(fullpath); + VIR_FREE(xmlStr); + continue; + } - virDomainObjLock(vm); - event = virDomainEventRebootNewFromObj(vm); - virDomainObjUnlock(vm); + virDomainSnapshotAssignDef(&vm->snapshots, def); - if (event) { - qemuDriverLock(driver); - qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); + VIR_FREE(fullpath); + VIR_FREE(xmlStr); } - return 0; -} + /* FIXME: qemu keeps internal track of snapshots. We can get access + * to this info via the "info snapshots" monitor command for running + * domains, or via "qemu-img snapshot -l" for shutoff domains. It would + * be nice to update our internal state based on that, but there is a + * a problem. qemu doesn't track all of the same metadata that we do. + * In particular we wouldn't be able to fill in the <parent>, which is + * pretty important in our metadata. + */ + virResetLastError(); -static int -qemuHandleDomainShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED, - virDomainObjPtr vm) -{ - virDomainObjLock(vm); - ((qemuDomainObjPrivatePtr) vm->privateData)->gotShutdown = true; +cleanup: + if (dir) + closedir(dir); + VIR_FREE(snapDir); virDomainObjUnlock(vm); - - return 0; } - +/** + * qemudStartup: + * + * Initialization function for the QEmu daemon + */ static int -qemuHandleDomainStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, - virDomainObjPtr vm) -{ - struct qemud_driver *driver = qemu_driver; - virDomainEventPtr event = NULL; - - virDomainObjLock(vm); - if (vm->state == VIR_DOMAIN_RUNNING) { - VIR_DEBUG("Transitioned guest %s to paused state due to unknown event", vm->def->name); - - vm->state = VIR_DOMAIN_PAUSED; - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); +qemudStartup(int privileged) { + char *base = NULL; + char *driverConf = NULL; + int rc; + virConnectPtr conn = NULL; - if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) - VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); - } - virDomainObjUnlock(vm); + if (VIR_ALLOC(qemu_driver) < 0) + return -1; - if (event) { - qemuDriverLock(driver); - if (event) - qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); + if (virMutexInit(&qemu_driver->lock) < 0) { + VIR_ERROR0(_("cannot initialize mutex")); + VIR_FREE(qemu_driver); + return -1; } + qemuDriverLock(qemu_driver); + qemu_driver->privileged = privileged; - return 0; -} - + /* Don't have a dom0 so start from 1 */ + qemu_driver->nextvmid = 1; -static int -qemuHandleDomainRTCChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED, - virDomainObjPtr vm, - long long offset) -{ - struct qemud_driver *driver = qemu_driver; - virDomainEventPtr event; + if (virDomainObjListInit(&qemu_driver->domains) < 0) + goto out_of_memory; - virDomainObjLock(vm); - event = virDomainEventRTCChangeNewFromObj(vm, offset); + /* Init callback list */ + if (VIR_ALLOC(qemu_driver->domainEventCallbacks) < 0) + goto out_of_memory; + if (!(qemu_driver->domainEventQueue = virDomainEventQueueNew())) + goto out_of_memory; - if (vm->def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_VARIABLE) - vm->def->clock.data.adjustment = offset; + if ((qemu_driver->domainEventTimer = + virEventAddTimeout(-1, qemuDomainEventFlush, qemu_driver, NULL)) < 0) + goto error; - if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) - VIR_WARN0("unable to save domain status with RTC change"); + /* Allocate bitmap for vnc port reservation */ + if ((qemu_driver->reservedVNCPorts = + virBitmapAlloc(QEMU_VNC_PORT_MAX - QEMU_VNC_PORT_MIN)) == NULL) + goto out_of_memory; - virDomainObjUnlock(vm); + /* read the host sysinfo */ + if (privileged) + qemu_driver->hostsysinfo = virSysinfoRead(); - if (event) { - qemuDriverLock(driver); - qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); - } + if (privileged) { + if (virAsprintf(&qemu_driver->logDir, + "%s/log/libvirt/qemu", LOCALSTATEDIR) == -1) + goto out_of_memory; - return 0; -} + if ((base = strdup (SYSCONFDIR "/libvirt")) == NULL) + goto out_of_memory; + if (virAsprintf(&qemu_driver->stateDir, + "%s/run/libvirt/qemu", LOCALSTATEDIR) == -1) + goto out_of_memory; -static int -qemuHandleDomainWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, - virDomainObjPtr vm, - int action) -{ - struct qemud_driver *driver = qemu_driver; - virDomainEventPtr watchdogEvent = NULL; - virDomainEventPtr lifecycleEvent = NULL; + if (virAsprintf(&qemu_driver->libDir, + "%s/lib/libvirt/qemu", LOCALSTATEDIR) == -1) + goto out_of_memory; - virDomainObjLock(vm); - watchdogEvent = virDomainEventWatchdogNewFromObj(vm, action); + if (virAsprintf(&qemu_driver->cacheDir, + "%s/cache/libvirt/qemu", LOCALSTATEDIR) == -1) + goto out_of_memory; + if (virAsprintf(&qemu_driver->saveDir, + "%s/lib/libvirt/qemu/save", LOCALSTATEDIR) == -1) + goto out_of_memory; + if (virAsprintf(&qemu_driver->snapshotDir, + "%s/lib/libvirt/qemu/snapshot", LOCALSTATEDIR) == -1) + goto out_of_memory; + if (virAsprintf(&qemu_driver->autoDumpPath, + "%s/lib/libvirt/qemu/dump", LOCALSTATEDIR) == -1) + goto out_of_memory; + } else { + uid_t uid = geteuid(); + char *userdir = virGetUserDirectory(uid); + if (!userdir) + goto error; - if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE && - vm->state == VIR_DOMAIN_RUNNING) { - VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name); + if (virAsprintf(&qemu_driver->logDir, + "%s/.libvirt/qemu/log", userdir) == -1) { + VIR_FREE(userdir); + goto out_of_memory; + } - vm->state = VIR_DOMAIN_PAUSED; - lifecycleEvent = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG); + if (virAsprintf(&base, "%s/.libvirt", userdir) == -1) { + VIR_FREE(userdir); + goto out_of_memory; + } + VIR_FREE(userdir); - if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) - VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); + if (virAsprintf(&qemu_driver->stateDir, "%s/qemu/run", base) == -1) + goto out_of_memory; + if (virAsprintf(&qemu_driver->libDir, "%s/qemu/lib", base) == -1) + goto out_of_memory; + if (virAsprintf(&qemu_driver->cacheDir, "%s/qemu/cache", base) == -1) + goto out_of_memory; + if (virAsprintf(&qemu_driver->saveDir, "%s/qemu/save", base) == -1) + goto out_of_memory; + if (virAsprintf(&qemu_driver->snapshotDir, "%s/qemu/snapshot", base) == -1) + goto out_of_memory; + if (virAsprintf(&qemu_driver->autoDumpPath, "%s/qemu/dump", base) == -1) + goto out_of_memory; } - if (vm->def->watchdog->action == VIR_DOMAIN_WATCHDOG_ACTION_DUMP) { - struct watchdogEvent *wdEvent; - if (VIR_ALLOC(wdEvent) == 0) { - wdEvent->action = VIR_DOMAIN_WATCHDOG_ACTION_DUMP; - wdEvent->vm = vm; - ignore_value(virThreadPoolSendJob(driver->workerPool, wdEvent)); - } else - virReportOOMError(); + if (virFileMakePath(qemu_driver->stateDir) != 0) { + char ebuf[1024]; + VIR_ERROR(_("Failed to create state dir '%s': %s"), + qemu_driver->stateDir, virStrerror(errno, ebuf, sizeof ebuf)); + goto error; } - - virDomainObjUnlock(vm); - - if (watchdogEvent || lifecycleEvent) { - qemuDriverLock(driver); - if (watchdogEvent) - qemuDomainEventQueue(driver, watchdogEvent); - if (lifecycleEvent) - qemuDomainEventQueue(driver, lifecycleEvent); - qemuDriverUnlock(driver); + if (virFileMakePath(qemu_driver->libDir) != 0) { + char ebuf[1024]; + VIR_ERROR(_("Failed to create lib dir '%s': %s"), + qemu_driver->libDir, virStrerror(errno, ebuf, sizeof ebuf)); + goto error; + } + if (virFileMakePath(qemu_driver->cacheDir) != 0) { + char ebuf[1024]; + VIR_ERROR(_("Failed to create cache dir '%s': %s"), + qemu_driver->cacheDir, virStrerror(errno, ebuf, sizeof ebuf)); + goto error; + } + if (virFileMakePath(qemu_driver->saveDir) != 0) { + char ebuf[1024]; + VIR_ERROR(_("Failed to create save dir '%s': %s"), + qemu_driver->saveDir, virStrerror(errno, ebuf, sizeof ebuf)); + goto error; + } + if (virFileMakePath(qemu_driver->snapshotDir) != 0) { + char ebuf[1024]; + VIR_ERROR(_("Failed to create save dir '%s': %s"), + qemu_driver->snapshotDir, virStrerror(errno, ebuf, sizeof ebuf)); + goto error; + } + if (virFileMakePath(qemu_driver->autoDumpPath) != 0) { + char ebuf[1024]; + VIR_ERROR(_("Failed to create dump dir '%s': %s"), + qemu_driver->autoDumpPath, virStrerror(errno, ebuf, sizeof ebuf)); + goto error; } - return 0; -} - + /* Configuration paths are either ~/.libvirt/qemu/... (session) or + * /etc/libvirt/qemu/... (system). + */ + if (virAsprintf(&driverConf, "%s/qemu.conf", base) < 0 || + virAsprintf(&qemu_driver->configDir, "%s/qemu", base) < 0 || + virAsprintf(&qemu_driver->autostartDir, "%s/qemu/autostart", base) < 0) + goto out_of_memory; -static int -qemuHandleDomainIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, - virDomainObjPtr vm, - const char *diskAlias, - int action, - const char *reason) -{ - struct qemud_driver *driver = qemu_driver; - virDomainEventPtr ioErrorEvent = NULL; - virDomainEventPtr ioErrorEvent2 = NULL; - virDomainEventPtr lifecycleEvent = NULL; - const char *srcPath; - const char *devAlias; - virDomainDiskDefPtr disk; + VIR_FREE(base); - virDomainObjLock(vm); - disk = findDomainDiskByAlias(vm, diskAlias); + rc = virCgroupForDriver("qemu", &qemu_driver->cgroup, privileged, 1); + if (rc < 0) { + char buf[1024]; + VIR_WARN("Unable to create cgroup for driver: %s", + virStrerror(-rc, buf, sizeof(buf))); + } - if (disk) { - srcPath = disk->src; - devAlias = disk->info.alias; - } else { - srcPath = ""; - devAlias = ""; + if (qemudLoadDriverConfig(qemu_driver, driverConf) < 0) { + goto error; } + VIR_FREE(driverConf); - ioErrorEvent = virDomainEventIOErrorNewFromObj(vm, srcPath, devAlias, action); - ioErrorEvent2 = virDomainEventIOErrorReasonNewFromObj(vm, srcPath, devAlias, action, reason); + if (qemuSecurityInit(qemu_driver) < 0) + goto error; - if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE && - vm->state == VIR_DOMAIN_RUNNING) { - VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name); + if ((qemu_driver->caps = qemuCreateCapabilities(NULL, + qemu_driver)) == NULL) + goto error; - vm->state = VIR_DOMAIN_PAUSED; - lifecycleEvent = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - VIR_DOMAIN_EVENT_SUSPENDED_IOERROR); - - if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) - VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); - } - virDomainObjUnlock(vm); - - if (ioErrorEvent || ioErrorEvent2 || lifecycleEvent) { - qemuDriverLock(driver); - if (ioErrorEvent) - qemuDomainEventQueue(driver, ioErrorEvent); - if (ioErrorEvent2) - qemuDomainEventQueue(driver, ioErrorEvent2); - if (lifecycleEvent) - qemuDomainEventQueue(driver, lifecycleEvent); - qemuDriverUnlock(driver); - } - - return 0; -} - - -static int -qemuHandleDomainGraphics(qemuMonitorPtr mon ATTRIBUTE_UNUSED, - virDomainObjPtr vm, - int phase, - int localFamily, - const char *localNode, - const char *localService, - int remoteFamily, - const char *remoteNode, - const char *remoteService, - const char *authScheme, - const char *x509dname, - const char *saslUsername) -{ - struct qemud_driver *driver = qemu_driver; - virDomainEventPtr event; - virDomainEventGraphicsAddressPtr localAddr = NULL; - virDomainEventGraphicsAddressPtr remoteAddr = NULL; - virDomainEventGraphicsSubjectPtr subject = NULL; - int i; - - virDomainObjLock(vm); - - if (VIR_ALLOC(localAddr) < 0) - goto no_memory; - localAddr->family = localFamily; - if (!(localAddr->service = strdup(localService)) || - !(localAddr->node = strdup(localNode))) - goto no_memory; - - if (VIR_ALLOC(remoteAddr) < 0) - goto no_memory; - remoteAddr->family = remoteFamily; - if (!(remoteAddr->service = strdup(remoteService)) || - !(remoteAddr->node = strdup(remoteNode))) - goto no_memory; - - if (VIR_ALLOC(subject) < 0) - goto no_memory; - if (x509dname) { - if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0) - goto no_memory; - if (!(subject->identities[subject->nidentity].type = strdup("x509dname")) || - !(subject->identities[subject->nidentity].name = strdup(x509dname))) - goto no_memory; - subject->nidentity++; - } - if (saslUsername) { - if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0) - goto no_memory; - if (!(subject->identities[subject->nidentity].type = strdup("saslUsername")) || - !(subject->identities[subject->nidentity].name = strdup(saslUsername))) - goto no_memory; - subject->nidentity++; - } - - event = virDomainEventGraphicsNewFromObj(vm, phase, localAddr, remoteAddr, authScheme, subject); - virDomainObjUnlock(vm); - - if (event) { - qemuDriverLock(driver); - qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); - } - - return 0; - -no_memory: - virReportOOMError(); - if (localAddr) { - VIR_FREE(localAddr->service); - VIR_FREE(localAddr->node); - VIR_FREE(localAddr); - } - if (remoteAddr) { - VIR_FREE(remoteAddr->service); - VIR_FREE(remoteAddr->node); - VIR_FREE(remoteAddr); - } - if (subject) { - for (i = 0 ; i < subject->nidentity ; i++) { - VIR_FREE(subject->identities[i].type); - VIR_FREE(subject->identities[i].name); - } - VIR_FREE(subject->identities); - VIR_FREE(subject); - } - - return -1; -} - - -static void qemuHandleMonitorDestroy(qemuMonitorPtr mon, - virDomainObjPtr vm) -{ - qemuDomainObjPrivatePtr priv = vm->privateData; - if (priv->mon == mon) - priv->mon = NULL; - virDomainObjUnref(vm); -} - -static qemuMonitorCallbacks monitorCallbacks = { - .destroy = qemuHandleMonitorDestroy, - .eofNotify = qemuHandleMonitorEOF, - .diskSecretLookup = findVolumeQcowPassphrase, - .domainShutdown = qemuHandleDomainShutdown, - .domainStop = qemuHandleDomainStop, - .domainReset = qemuHandleDomainReset, - .domainRTCChange = qemuHandleDomainRTCChange, - .domainWatchdog = qemuHandleDomainWatchdog, - .domainIOError = qemuHandleDomainIOError, - .domainGraphics = qemuHandleDomainGraphics, -}; - -static int -qemuConnectMonitor(struct qemud_driver *driver, virDomainObjPtr vm) -{ - qemuDomainObjPrivatePtr priv = vm->privateData; - int ret = -1; - - if (virSecurityManagerSetSocketLabel(driver->securityManager, vm) < 0) { - VIR_ERROR(_("Failed to set security context for monitor for %s"), - vm->def->name); - goto error; - } - - /* Hold an extra reference because we can't allow 'vm' to be - * deleted while the monitor is active */ - virDomainObjRef(vm); - - priv->mon = qemuMonitorOpen(vm, - priv->monConfig, - priv->monJSON, - &monitorCallbacks); - - if (priv->mon == NULL) - virDomainObjUnref(vm); - - if (virSecurityManagerClearSocketLabel(driver->securityManager, vm) < 0) { - VIR_ERROR(_("Failed to clear security context for monitor for %s"), - vm->def->name); - goto error; - } - - if (priv->mon == NULL) { - VIR_INFO("Failed to connect monitor for %s", vm->def->name); - goto error; - } - - - qemuDomainObjEnterMonitorWithDriver(driver, vm); - ret = qemuMonitorSetCapabilities(priv->mon); - qemuDomainObjExitMonitorWithDriver(driver, vm); - -error: - - return ret; -} - -struct virReconnectDomainData { - virConnectPtr conn; - struct qemud_driver *driver; -}; -/* - * Open an existing VM's monitor, re-detect VCPU threads - * and re-reserve the security labels in use - */ -static void -qemuReconnectDomain(void *payload, const char *name ATTRIBUTE_UNUSED, void *opaque) -{ - virDomainObjPtr obj = payload; - struct virReconnectDomainData *data = opaque; - struct qemud_driver *driver = data->driver; - qemuDomainObjPrivatePtr priv; - unsigned long long qemuCmdFlags; - virConnectPtr conn = data->conn; - - virDomainObjLock(obj); - - VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name); - - priv = obj->privateData; - - /* Hold an extra reference because we can't allow 'vm' to be - * deleted if qemuConnectMonitor() failed */ - virDomainObjRef(obj); - - /* XXX check PID liveliness & EXE path */ - if (qemuConnectMonitor(driver, obj) < 0) - goto error; - - if (qemuUpdateActivePciHostdevs(driver, obj->def) < 0) { - goto error; - } - - /* XXX we should be persisting the original flags in the XML - * not re-detecting them, since the binary may have changed - * since launch time */ - if (qemuCapsExtractVersionInfo(obj->def->emulator, - NULL, - &qemuCmdFlags) >= 0 && - (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE)) { - priv->persistentAddrs = 1; - - if (!(priv->pciaddrs = qemuDomainPCIAddressSetCreate(obj->def)) || - qemuAssignDevicePCISlots(obj->def, priv->pciaddrs) < 0) - goto error; - } - - if (virSecurityManagerReserveLabel(driver->securityManager, obj) < 0) - goto error; - - if (qemudVMFiltersInstantiate(conn, obj->def)) - goto error; - - if (obj->def->id >= driver->nextvmid) - driver->nextvmid = obj->def->id + 1; - - if (virDomainObjUnref(obj) > 0) - virDomainObjUnlock(obj); - return; - -error: - if (!virDomainObjIsActive(obj)) { - if (virDomainObjUnref(obj) > 0) - virDomainObjUnlock(obj); - return; - } - - if (virDomainObjUnref(obj) > 0) { - /* We can't get the monitor back, so must kill the VM - * to remove danger of it ending up running twice if - * user tries to start it again later */ - qemudShutdownVMDaemon(driver, obj, 0); - if (!obj->persistent) - virDomainRemoveInactive(&driver->domains, obj); - else - virDomainObjUnlock(obj); - } -} - -/** - * qemudReconnectDomains - * - * Try to re-open the resources for live VMs that we care - * about. - */ -static void -qemuReconnectDomains(virConnectPtr conn, struct qemud_driver *driver) -{ - struct virReconnectDomainData data = {conn, driver}; - virHashForEach(driver->domains.objs, qemuReconnectDomain, &data); -} - - -static int -qemuSecurityInit(struct qemud_driver *driver) -{ - virSecurityManagerPtr mgr = virSecurityManagerNew(driver->securityDriverName, - driver->allowDiskFormatProbing); - if (!mgr) - goto error; - - if (driver->privileged) { - virSecurityManagerPtr dac = virSecurityManagerNewDAC(driver->user, - driver->group, - driver->allowDiskFormatProbing, - driver->dynamicOwnership); - if (!dac) - goto error; - - if (!(driver->securityManager = virSecurityManagerNewStack(mgr, - dac))) - goto error; - } else { - driver->securityManager = mgr; - } - - return 0; - -error: - VIR_ERROR0(_("Failed to initialize security drivers")); - virSecurityManagerFree(mgr); - return -1; -} - - -static virCapsPtr -qemuCreateCapabilities(virCapsPtr oldcaps, - struct qemud_driver *driver) -{ - virCapsPtr caps; - - /* Basic host arch / guest machine capabilities */ - if (!(caps = qemuCapsInit(oldcaps))) { - virReportOOMError(); - return NULL; - } - - if (driver->allowDiskFormatProbing) { - caps->defaultDiskDriverName = NULL; - caps->defaultDiskDriverType = NULL; - } else { - caps->defaultDiskDriverName = "qemu"; - caps->defaultDiskDriverType = "raw"; - } - - qemuDomainSetPrivateDataHooks(caps); - qemuDomainSetNamespaceHooks(caps); - - if (virGetHostUUID(caps->host.host_uuid)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("cannot get the host uuid")); - goto err_exit; - } - - /* Security driver data */ - const char *doi, *model; - - doi = virSecurityManagerGetDOI(driver->securityManager); - model = virSecurityManagerGetModel(driver->securityManager); - if (STRNEQ(model, "none")) { - if (!(caps->host.secModel.model = strdup(model))) - goto no_memory; - if (!(caps->host.secModel.doi = strdup(doi))) - goto no_memory; - } - - VIR_DEBUG("Initialized caps for security driver \"%s\" with " - "DOI \"%s\"", model, doi); - - return caps; - -no_memory: - virReportOOMError(); -err_exit: - virCapabilitiesFree(caps); - return NULL; -} - -static void qemuDomainSnapshotLoad(void *payload, - const char *name ATTRIBUTE_UNUSED, - void *data) -{ - virDomainObjPtr vm = (virDomainObjPtr)payload; - char *baseDir = (char *)data; - char *snapDir = NULL; - DIR *dir = NULL; - struct dirent *entry; - char *xmlStr; - int ret; - char *fullpath; - virDomainSnapshotDefPtr def = NULL; - char ebuf[1024]; - - virDomainObjLock(vm); - if (virAsprintf(&snapDir, "%s/%s", baseDir, vm->def->name) < 0) { - VIR_ERROR(_("Failed to allocate memory for snapshot directory for domain %s"), - vm->def->name); - goto cleanup; - } - - VIR_INFO("Scanning for snapshots for domain %s in %s", vm->def->name, - snapDir); - - if (!(dir = opendir(snapDir))) { - if (errno != ENOENT) - VIR_ERROR(_("Failed to open snapshot directory %s for domain %s: %s"), - snapDir, vm->def->name, - virStrerror(errno, ebuf, sizeof(ebuf))); - goto cleanup; - } - - while ((entry = readdir(dir))) { - if (entry->d_name[0] == '.') - continue; - - /* NB: ignoring errors, so one malformed config doesn't - kill the whole process */ - VIR_INFO("Loading snapshot file '%s'", entry->d_name); - - if (virAsprintf(&fullpath, "%s/%s", snapDir, entry->d_name) < 0) { - VIR_ERROR0(_("Failed to allocate memory for path")); - continue; - } - - ret = virFileReadAll(fullpath, 1024*1024*1, &xmlStr); - if (ret < 0) { - /* Nothing we can do here, skip this one */ - VIR_ERROR(_("Failed to read snapshot file %s: %s"), fullpath, - virStrerror(errno, ebuf, sizeof(ebuf))); - VIR_FREE(fullpath); - continue; - } - - def = virDomainSnapshotDefParseString(xmlStr, 0); - if (def == NULL) { - /* Nothing we can do here, skip this one */ - VIR_ERROR(_("Failed to parse snapshot XML from file '%s'"), fullpath); - VIR_FREE(fullpath); - VIR_FREE(xmlStr); - continue; - } - - virDomainSnapshotAssignDef(&vm->snapshots, def); - - VIR_FREE(fullpath); - VIR_FREE(xmlStr); - } - - /* FIXME: qemu keeps internal track of snapshots. We can get access - * to this info via the "info snapshots" monitor command for running - * domains, or via "qemu-img snapshot -l" for shutoff domains. It would - * be nice to update our internal state based on that, but there is a - * a problem. qemu doesn't track all of the same metadata that we do. - * In particular we wouldn't be able to fill in the <parent>, which is - * pretty important in our metadata. - */ - - virResetLastError(); - -cleanup: - if (dir) - closedir(dir); - VIR_FREE(snapDir); - virDomainObjUnlock(vm); -} - -/** - * qemudStartup: - * - * Initialization function for the QEmu daemon - */ -static int -qemudStartup(int privileged) { - char *base = NULL; - char *driverConf = NULL; - int rc; - virConnectPtr conn = NULL; - - if (VIR_ALLOC(qemu_driver) < 0) - return -1; - - if (virMutexInit(&qemu_driver->lock) < 0) { - VIR_ERROR0(_("cannot initialize mutex")); - VIR_FREE(qemu_driver); - return -1; - } - qemuDriverLock(qemu_driver); - qemu_driver->privileged = privileged; - - /* Don't have a dom0 so start from 1 */ - qemu_driver->nextvmid = 1; - - if (virDomainObjListInit(&qemu_driver->domains) < 0) - goto out_of_memory; - - /* Init callback list */ - if (VIR_ALLOC(qemu_driver->domainEventCallbacks) < 0) - goto out_of_memory; - if (!(qemu_driver->domainEventQueue = virDomainEventQueueNew())) - goto out_of_memory; - - if ((qemu_driver->domainEventTimer = - virEventAddTimeout(-1, qemuDomainEventFlush, qemu_driver, NULL)) < 0) - goto error; - - /* Allocate bitmap for vnc port reservation */ - if ((qemu_driver->reservedVNCPorts = - virBitmapAlloc(QEMU_VNC_PORT_MAX - QEMU_VNC_PORT_MIN)) == NULL) - goto out_of_memory; - - /* read the host sysinfo */ - if (privileged) - qemu_driver->hostsysinfo = virSysinfoRead(); - - if (privileged) { - if (virAsprintf(&qemu_driver->logDir, - "%s/log/libvirt/qemu", LOCALSTATEDIR) == -1) - goto out_of_memory; - - if ((base = strdup (SYSCONFDIR "/libvirt")) == NULL) - goto out_of_memory; - - if (virAsprintf(&qemu_driver->stateDir, - "%s/run/libvirt/qemu", LOCALSTATEDIR) == -1) - goto out_of_memory; - - if (virAsprintf(&qemu_driver->libDir, - "%s/lib/libvirt/qemu", LOCALSTATEDIR) == -1) - goto out_of_memory; - - if (virAsprintf(&qemu_driver->cacheDir, - "%s/cache/libvirt/qemu", LOCALSTATEDIR) == -1) - goto out_of_memory; - if (virAsprintf(&qemu_driver->saveDir, - "%s/lib/libvirt/qemu/save", LOCALSTATEDIR) == -1) - goto out_of_memory; - if (virAsprintf(&qemu_driver->snapshotDir, - "%s/lib/libvirt/qemu/snapshot", LOCALSTATEDIR) == -1) - goto out_of_memory; - if (virAsprintf(&qemu_driver->autoDumpPath, - "%s/lib/libvirt/qemu/dump", LOCALSTATEDIR) == -1) - goto out_of_memory; - } else { - uid_t uid = geteuid(); - char *userdir = virGetUserDirectory(uid); - if (!userdir) - goto error; - - if (virAsprintf(&qemu_driver->logDir, - "%s/.libvirt/qemu/log", userdir) == -1) { - VIR_FREE(userdir); - goto out_of_memory; - } - - if (virAsprintf(&base, "%s/.libvirt", userdir) == -1) { - VIR_FREE(userdir); - goto out_of_memory; - } - VIR_FREE(userdir); - - if (virAsprintf(&qemu_driver->stateDir, "%s/qemu/run", base) == -1) - goto out_of_memory; - if (virAsprintf(&qemu_driver->libDir, "%s/qemu/lib", base) == -1) - goto out_of_memory; - if (virAsprintf(&qemu_driver->cacheDir, "%s/qemu/cache", base) == -1) - goto out_of_memory; - if (virAsprintf(&qemu_driver->saveDir, "%s/qemu/save", base) == -1) - goto out_of_memory; - if (virAsprintf(&qemu_driver->snapshotDir, "%s/qemu/snapshot", base) == -1) - goto out_of_memory; - if (virAsprintf(&qemu_driver->autoDumpPath, "%s/qemu/dump", base) == -1) - goto out_of_memory; - } - - if (virFileMakePath(qemu_driver->stateDir) != 0) { - char ebuf[1024]; - VIR_ERROR(_("Failed to create state dir '%s': %s"), - qemu_driver->stateDir, virStrerror(errno, ebuf, sizeof ebuf)); - goto error; - } - if (virFileMakePath(qemu_driver->libDir) != 0) { - char ebuf[1024]; - VIR_ERROR(_("Failed to create lib dir '%s': %s"), - qemu_driver->libDir, virStrerror(errno, ebuf, sizeof ebuf)); - goto error; - } - if (virFileMakePath(qemu_driver->cacheDir) != 0) { - char ebuf[1024]; - VIR_ERROR(_("Failed to create cache dir '%s': %s"), - qemu_driver->cacheDir, virStrerror(errno, ebuf, sizeof ebuf)); - goto error; - } - if (virFileMakePath(qemu_driver->saveDir) != 0) { - char ebuf[1024]; - VIR_ERROR(_("Failed to create save dir '%s': %s"), - qemu_driver->saveDir, virStrerror(errno, ebuf, sizeof ebuf)); - goto error; - } - if (virFileMakePath(qemu_driver->snapshotDir) != 0) { - char ebuf[1024]; - VIR_ERROR(_("Failed to create save dir '%s': %s"), - qemu_driver->snapshotDir, virStrerror(errno, ebuf, sizeof ebuf)); - goto error; - } - if (virFileMakePath(qemu_driver->autoDumpPath) != 0) { - char ebuf[1024]; - VIR_ERROR(_("Failed to create dump dir '%s': %s"), - qemu_driver->autoDumpPath, virStrerror(errno, ebuf, sizeof ebuf)); - goto error; - } - - /* Configuration paths are either ~/.libvirt/qemu/... (session) or - * /etc/libvirt/qemu/... (system). - */ - if (virAsprintf(&driverConf, "%s/qemu.conf", base) < 0 || - virAsprintf(&qemu_driver->configDir, "%s/qemu", base) < 0 || - virAsprintf(&qemu_driver->autostartDir, "%s/qemu/autostart", base) < 0) - goto out_of_memory; - - VIR_FREE(base); - - rc = virCgroupForDriver("qemu", &qemu_driver->cgroup, privileged, 1); - if (rc < 0) { - char buf[1024]; - VIR_WARN("Unable to create cgroup for driver: %s", - virStrerror(-rc, buf, sizeof(buf))); - } - - if (qemudLoadDriverConfig(qemu_driver, driverConf) < 0) { - goto error; - } - VIR_FREE(driverConf); - - if (qemuSecurityInit(qemu_driver) < 0) - goto error; - - if ((qemu_driver->caps = qemuCreateCapabilities(NULL, - qemu_driver)) == NULL) - goto error; - - if ((qemu_driver->activePciHostdevs = pciDeviceListNew()) == NULL) - goto error; - - if (privileged) { - if (chown(qemu_driver->libDir, qemu_driver->user, qemu_driver->group) < 0) { - virReportSystemError(errno, - _("unable to set ownership of '%s' to user %d:%d"), - qemu_driver->libDir, qemu_driver->user, qemu_driver->group); - goto error; - } - if (chown(qemu_driver->cacheDir, qemu_driver->user, qemu_driver->group) < 0) { - virReportSystemError(errno, - _("unable to set ownership of '%s' to %d:%d"), - qemu_driver->cacheDir, qemu_driver->user, qemu_driver->group); - goto error; - } - if (chown(qemu_driver->saveDir, qemu_driver->user, qemu_driver->group) < 0) { - virReportSystemError(errno, - _("unable to set ownership of '%s' to %d:%d"), - qemu_driver->saveDir, qemu_driver->user, qemu_driver->group); - goto error; - } - if (chown(qemu_driver->snapshotDir, qemu_driver->user, qemu_driver->group) < 0) { - virReportSystemError(errno, - _("unable to set ownership of '%s' to %d:%d"), - qemu_driver->snapshotDir, qemu_driver->user, qemu_driver->group); - goto error; - } - } - - /* If hugetlbfs is present, then we need to create a sub-directory within - * it, since we can't assume the root mount point has permissions that - * will let our spawned QEMU instances use it. - * - * NB the check for '/', since user may config "" to disable hugepages - * even when mounted - */ - if (qemu_driver->hugetlbfs_mount && - qemu_driver->hugetlbfs_mount[0] == '/') { - char *mempath = NULL; - if (virAsprintf(&mempath, "%s/libvirt/qemu", qemu_driver->hugetlbfs_mount) < 0) - goto out_of_memory; - - if ((rc = virFileMakePath(mempath)) != 0) { - virReportSystemError(rc, - _("unable to create hugepage path %s"), mempath); - VIR_FREE(mempath); - goto error; - } - if (qemu_driver->privileged && - chown(mempath, qemu_driver->user, qemu_driver->group) < 0) { - virReportSystemError(errno, - _("unable to set ownership on %s to %d:%d"), - mempath, qemu_driver->user, qemu_driver->group); - VIR_FREE(mempath); - goto error; - } - - qemu_driver->hugepage_path = mempath; - } - - /* Get all the running persistent or transient configs first */ - if (virDomainLoadAllConfigs(qemu_driver->caps, - &qemu_driver->domains, - qemu_driver->stateDir, - NULL, - 1, NULL, NULL) < 0) - goto error; - - conn = virConnectOpen(qemu_driver->privileged ? - "qemu:///system" : - "qemu:///session"); - - qemuReconnectDomains(conn, qemu_driver); - - /* Then inactive persistent configs */ - if (virDomainLoadAllConfigs(qemu_driver->caps, - &qemu_driver->domains, - qemu_driver->configDir, - qemu_driver->autostartDir, - 0, NULL, NULL) < 0) - goto error; - - - virHashForEach(qemu_driver->domains.objs, qemuDomainSnapshotLoad, - qemu_driver->snapshotDir); - - qemuDriverUnlock(qemu_driver); - - qemudAutostartConfigs(qemu_driver); - - qemu_driver->workerPool = virThreadPoolNew(0, 1, processWatchdogEvent, qemu_driver); - if (!qemu_driver->workerPool) - goto error; - - if (conn) - virConnectClose(conn); - - return 0; - -out_of_memory: - virReportOOMError(); -error: - if (qemu_driver) - qemuDriverUnlock(qemu_driver); - if (conn) - virConnectClose(conn); - VIR_FREE(base); - VIR_FREE(driverConf); - qemudShutdown(); - return -1; -} - -static void qemudNotifyLoadDomain(virDomainObjPtr vm, int newVM, void *opaque) -{ - struct qemud_driver *driver = opaque; - - if (newVM) { - virDomainEventPtr event = - virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_DEFINED, - VIR_DOMAIN_EVENT_DEFINED_ADDED); - if (event) - qemuDomainEventQueue(driver, event); - } -} - -/** - * qemudReload: - * - * Function to restart the QEmu daemon, it will recheck the configuration - * files and update its state and the networking - */ -static int -qemudReload(void) { - if (!qemu_driver) - return 0; - - qemuDriverLock(qemu_driver); - virDomainLoadAllConfigs(qemu_driver->caps, - &qemu_driver->domains, - qemu_driver->configDir, - qemu_driver->autostartDir, - 0, qemudNotifyLoadDomain, qemu_driver); - qemuDriverUnlock(qemu_driver); - - qemudAutostartConfigs(qemu_driver); - - return 0; -} - -/** - * qemudActive: - * - * Checks if the QEmu daemon is active, i.e. has an active domain or - * an active network - * - * Returns 1 if active, 0 otherwise - */ -static int -qemudActive(void) { - int active = 0; - - if (!qemu_driver) - return 0; - - /* XXX having to iterate here is not great because it requires many locks */ - qemuDriverLock(qemu_driver); - active = virDomainObjListNumOfDomains(&qemu_driver->domains, 1); - qemuDriverUnlock(qemu_driver); - return active; -} - -/** - * qemudShutdown: - * - * Shutdown the QEmu daemon, it will stop all active domains and networks - */ -static int -qemudShutdown(void) { - int i; - - if (!qemu_driver) - return -1; - - qemuDriverLock(qemu_driver); - pciDeviceListFree(qemu_driver->activePciHostdevs); - virCapabilitiesFree(qemu_driver->caps); - - virDomainObjListDeinit(&qemu_driver->domains); - virBitmapFree(qemu_driver->reservedVNCPorts); - - virSysinfoDefFree(qemu_driver->hostsysinfo); - - VIR_FREE(qemu_driver->configDir); - VIR_FREE(qemu_driver->autostartDir); - VIR_FREE(qemu_driver->logDir); - VIR_FREE(qemu_driver->stateDir); - VIR_FREE(qemu_driver->libDir); - VIR_FREE(qemu_driver->cacheDir); - VIR_FREE(qemu_driver->saveDir); - VIR_FREE(qemu_driver->snapshotDir); - VIR_FREE(qemu_driver->autoDumpPath); - VIR_FREE(qemu_driver->vncTLSx509certdir); - VIR_FREE(qemu_driver->vncListen); - VIR_FREE(qemu_driver->vncPassword); - VIR_FREE(qemu_driver->vncSASLdir); - VIR_FREE(qemu_driver->spiceTLSx509certdir); - VIR_FREE(qemu_driver->spiceListen); - VIR_FREE(qemu_driver->spicePassword); - VIR_FREE(qemu_driver->hugetlbfs_mount); - VIR_FREE(qemu_driver->hugepage_path); - VIR_FREE(qemu_driver->saveImageFormat); - VIR_FREE(qemu_driver->dumpImageFormat); - - virSecurityManagerFree(qemu_driver->securityManager); - - ebtablesContextFree(qemu_driver->ebtables); - - if (qemu_driver->cgroupDeviceACL) { - for (i = 0 ; qemu_driver->cgroupDeviceACL[i] != NULL ; i++) - VIR_FREE(qemu_driver->cgroupDeviceACL[i]); - VIR_FREE(qemu_driver->cgroupDeviceACL); - } - - /* Free domain callback list */ - virDomainEventCallbackListFree(qemu_driver->domainEventCallbacks); - virDomainEventQueueFree(qemu_driver->domainEventQueue); - - if (qemu_driver->domainEventTimer != -1) - virEventRemoveTimeout(qemu_driver->domainEventTimer); - - if (qemu_driver->brctl) - brShutdown(qemu_driver->brctl); - - virCgroupFree(&qemu_driver->cgroup); - - qemuDriverUnlock(qemu_driver); - virMutexDestroy(&qemu_driver->lock); - virThreadPoolFree(qemu_driver->workerPool); - VIR_FREE(qemu_driver); - - return 0; -} - -typedef int qemuLogHandleOutput(virDomainObjPtr vm, - const char *output, - int fd); - -/* - * Returns -1 for error, 0 on success - */ -static int -qemudReadLogOutput(virDomainObjPtr vm, - int fd, - char *buf, - size_t buflen, - qemuLogHandleOutput func, - const char *what, - int timeout) -{ - int retries = (timeout*10); - int got = 0; - buf[0] = '\0'; - - while (retries) { - ssize_t func_ret, ret; - int isdead = 0; - - func_ret = func(vm, buf, fd); - - if (kill(vm->pid, 0) == -1 && errno == ESRCH) - isdead = 1; - - /* Any failures should be detected before we read the log, so we - * always have something useful to report on failure. */ - ret = saferead(fd, buf+got, buflen-got-1); - if (ret < 0) { - virReportSystemError(errno, - _("Failure while reading %s log output"), - what); - return -1; - } - - got += ret; - buf[got] = '\0'; - if (got == buflen-1) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("Out of space while reading %s log output: %s"), - what, buf); - return -1; - } - - if (isdead) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("Process exited while reading %s log output: %s"), - what, buf); - return -1; - } - - if (func_ret <= 0) - return func_ret; - - usleep(100*1000); - retries--; - } - - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("Timed out while reading %s log output: %s"), - what, buf); - return -1; -} - - -/* - * Look at a chunk of data from the QEMU stdout logs and try to - * find a TTY device, as indicated by a line like - * - * char device redirected to /dev/pts/3 - * - * Returns -1 for error, 0 success, 1 continue reading - */ -static int -qemudExtractTTYPath(const char *haystack, - size_t *offset, - char **path) -{ - static const char needle[] = "char device redirected to"; - char *tmp, *dev; - - VIR_FREE(*path); - /* First look for our magic string */ - if (!(tmp = strstr(haystack + *offset, needle))) { - return 1; - } - tmp += sizeof(needle); - dev = tmp; - - /* - * And look for first whitespace character and nul terminate - * to mark end of the pty path - */ - while (*tmp) { - if (c_isspace(*tmp)) { - *path = strndup(dev, tmp-dev); - if (*path == NULL) { - virReportOOMError(); - return -1; - } - - /* ... now further update offset till we get EOL */ - *offset = tmp - haystack; - return 0; - } - tmp++; - } - - /* - * We found a path, but didn't find any whitespace, - * so it must be still incomplete - we should at - * least see a \n - indicate that we want to carry - * on trying again - */ - return 1; -} - -static int -qemudFindCharDevicePTYsMonitor(virDomainObjPtr vm, - virHashTablePtr paths) -{ - int i; - -#define LOOKUP_PTYS(array, arraylen, idprefix) \ - for (i = 0 ; i < (arraylen) ; i++) { \ - virDomainChrDefPtr chr = (array)[i]; \ - if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) { \ - char id[16]; \ - \ - if (snprintf(id, sizeof(id), idprefix "%i", i) >= sizeof(id)) \ - return -1; \ - \ - const char *path = (const char *) virHashLookup(paths, id); \ - if (path == NULL) { \ - if (chr->source.data.file.path == NULL) { \ - /* neither the log output nor 'info chardev' had a */ \ - /* pty path for this chardev, report an error */ \ - qemuReportError(VIR_ERR_INTERNAL_ERROR, \ - _("no assigned pty for device %s"), id); \ - return -1; \ - } else { \ - /* 'info chardev' had no pty path for this chardev, */\ - /* but the log output had, so we're fine */ \ - continue; \ - } \ - } \ - \ - VIR_FREE(chr->source.data.file.path); \ - chr->source.data.file.path = strdup(path); \ - \ - if (chr->source.data.file.path == NULL) { \ - virReportOOMError(); \ - return -1; \ - } \ - } \ - } - - LOOKUP_PTYS(vm->def->serials, vm->def->nserials, "serial"); - LOOKUP_PTYS(vm->def->parallels, vm->def->nparallels, "parallel"); - LOOKUP_PTYS(vm->def->channels, vm->def->nchannels, "channel"); - if (vm->def->console) - LOOKUP_PTYS(&vm->def->console, 1, "console"); -#undef LOOKUP_PTYS - - return 0; -} - -static int -qemudFindCharDevicePTYs(virDomainObjPtr vm, - const char *output, - int fd ATTRIBUTE_UNUSED) -{ - size_t offset = 0; - int ret, i; - - /* The order in which QEMU prints out the PTY paths is - the order in which it procsses its serial and parallel - device args. This code must match that ordering.... */ - - /* first comes the serial devices */ - for (i = 0 ; i < vm->def->nserials ; i++) { - virDomainChrDefPtr chr = vm->def->serials[i]; - if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) { - if ((ret = qemudExtractTTYPath(output, &offset, - &chr->source.data.file.path)) != 0) - return ret; - } - } - - /* then the parallel devices */ - for (i = 0 ; i < vm->def->nparallels ; i++) { - virDomainChrDefPtr chr = vm->def->parallels[i]; - if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) { - if ((ret = qemudExtractTTYPath(output, &offset, - &chr->source.data.file.path)) != 0) - return ret; - } - } - - /* then the channel devices */ - for (i = 0 ; i < vm->def->nchannels ; i++) { - virDomainChrDefPtr chr = vm->def->channels[i]; - if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) { - if ((ret = qemudExtractTTYPath(output, &offset, - &chr->source.data.file.path)) != 0) - return ret; - } - } - - return 0; -} - -static void qemudFreePtyPath(void *payload, const char *name ATTRIBUTE_UNUSED) -{ - VIR_FREE(payload); -} - -static void -qemuReadLogFD(int logfd, char *buf, int maxlen, int off) -{ - int ret; - char *tmpbuf = buf + off; - - ret = saferead(logfd, tmpbuf, maxlen - off - 1); - if (ret < 0) { - ret = 0; - } - - tmpbuf[ret] = '\0'; -} - -static int -qemudWaitForMonitor(struct qemud_driver* driver, - virDomainObjPtr vm, off_t pos) -{ - char buf[4096] = ""; /* Plenty of space to get startup greeting */ - int logfd; - int ret = -1; - virHashTablePtr paths = NULL; - - if ((logfd = qemudLogReadFD(driver->logDir, vm->def->name, pos)) < 0) - return -1; - - if (qemudReadLogOutput(vm, logfd, buf, sizeof(buf), - qemudFindCharDevicePTYs, - "console", 30) < 0) - goto closelog; - - VIR_DEBUG("Connect monitor to %p '%s'", vm, vm->def->name); - if (qemuConnectMonitor(driver, vm) < 0) { - goto cleanup; - } - - /* Try to get the pty path mappings again via the monitor. This is much more - * reliable if it's available. - * Note that the monitor itself can be on a pty, so we still need to try the - * log output method. */ - paths = virHashCreate(0); - if (paths == NULL) { - virReportOOMError(); - goto cleanup; - } - - qemuDomainObjEnterMonitorWithDriver(driver, vm); - qemuDomainObjPrivatePtr priv = vm->privateData; - ret = qemuMonitorGetPtyPaths(priv->mon, paths); - qemuDomainObjExitMonitorWithDriver(driver, vm); - - VIR_DEBUG("qemuMonitorGetPtyPaths returned %i", ret); - if (ret == 0) { - ret = qemudFindCharDevicePTYsMonitor(vm, paths); - } - -cleanup: - if (paths) { - virHashFree(paths, qemudFreePtyPath); - } - - if (kill(vm->pid, 0) == -1 && errno == ESRCH) { - /* VM is dead, any other error raised in the interim is probably - * not as important as the qemu cmdline output */ - qemuReadLogFD(logfd, buf, sizeof(buf), strlen(buf)); - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("process exited while connecting to monitor: %s"), - buf); - ret = -1; - } - -closelog: - if (VIR_CLOSE(logfd) < 0) { - char ebuf[4096]; - VIR_WARN("Unable to close logfile: %s", - virStrerror(errno, ebuf, sizeof ebuf)); - } - - return ret; -} - -static int -qemuDetectVcpuPIDs(struct qemud_driver *driver, - virDomainObjPtr vm) { - pid_t *cpupids = NULL; - int ncpupids; - qemuDomainObjPrivatePtr priv = vm->privateData; - - if (vm->def->virtType != VIR_DOMAIN_VIRT_KVM) { - priv->nvcpupids = 1; - if (VIR_ALLOC_N(priv->vcpupids, priv->nvcpupids) < 0) { - virReportOOMError(); - return -1; - } - priv->vcpupids[0] = vm->pid; - return 0; - } - - /* What follows is now all KVM specific */ - - qemuDomainObjEnterMonitorWithDriver(driver, vm); - if ((ncpupids = qemuMonitorGetCPUInfo(priv->mon, &cpupids)) < 0) { - qemuDomainObjExitMonitorWithDriver(driver, vm); - return -1; - } - qemuDomainObjExitMonitorWithDriver(driver, vm); - - /* Treat failure to get VCPU<->PID mapping as non-fatal */ - if (ncpupids == 0) - return 0; - - if (ncpupids != vm->def->vcpus) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("got wrong number of vCPU pids from QEMU monitor. " - "got %d, wanted %d"), - ncpupids, vm->def->vcpus); - VIR_FREE(cpupids); - return -1; - } - - priv->nvcpupids = ncpupids; - priv->vcpupids = cpupids; - return 0; -} - -/* - * To be run between fork/exec of QEMU only - */ -static int -qemudInitCpuAffinity(virDomainObjPtr vm) -{ - int i, hostcpus, maxcpu = QEMUD_CPUMASK_LEN; - virNodeInfo nodeinfo; - unsigned char *cpumap; - int cpumaplen; - - DEBUG0("Setting CPU affinity"); - - if (nodeGetInfo(NULL, &nodeinfo) < 0) - return -1; - - /* setaffinity fails if you set bits for CPUs which - * aren't present, so we have to limit ourselves */ - hostcpus = VIR_NODEINFO_MAXCPUS(nodeinfo); - if (maxcpu > hostcpus) - maxcpu = hostcpus; - - cpumaplen = VIR_CPU_MAPLEN(maxcpu); - if (VIR_ALLOC_N(cpumap, cpumaplen) < 0) { - virReportOOMError(); - return -1; - } - - if (vm->def->cpumask) { - /* XXX why don't we keep 'cpumask' in the libvirt cpumap - * format to start with ?!?! */ - for (i = 0 ; i < maxcpu && i < vm->def->cpumasklen ; i++) - if (vm->def->cpumask[i]) - VIR_USE_CPU(cpumap, i); - } else { - /* You may think this is redundant, but we can't assume libvirtd - * itself is running on all pCPUs, so we need to explicitly set - * the spawned QEMU instance to all pCPUs if no map is given in - * its config file */ - for (i = 0 ; i < maxcpu ; i++) - VIR_USE_CPU(cpumap, i); - } - - /* We are pressuming we are running between fork/exec of QEMU - * so use '0' to indicate our own process ID. No threads are - * running at this point - */ - if (virProcessInfoSetAffinity(0, /* Self */ - cpumap, cpumaplen, maxcpu) < 0) { - VIR_FREE(cpumap); - return -1; - } - VIR_FREE(cpumap); - - return 0; -} - - -static int -qemuInitPasswords(virConnectPtr conn, - struct qemud_driver *driver, - virDomainObjPtr vm, - unsigned long long qemuCmdFlags) { - int ret = 0; - qemuDomainObjPrivatePtr priv = vm->privateData; - - if (vm->def->ngraphics == 1) { - if (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) { - ret = qemuDomainChangeGraphicsPasswords(driver, vm, - VIR_DOMAIN_GRAPHICS_TYPE_VNC, - &vm->def->graphics[0]->data.vnc.auth, - driver->vncPassword); - } else if (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) { - ret = qemuDomainChangeGraphicsPasswords(driver, vm, - VIR_DOMAIN_GRAPHICS_TYPE_SPICE, - &vm->def->graphics[0]->data.spice.auth, - driver->spicePassword); - } - } - - if (ret < 0) - goto cleanup; - - if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { - int i; - - for (i = 0 ; i < vm->def->ndisks ; i++) { - char *secret; - size_t secretLen; - - if (!vm->def->disks[i]->encryption || - !vm->def->disks[i]->src) - continue; - - if (getVolumeQcowPassphrase(conn, - vm->def->disks[i], - &secret, &secretLen) < 0) - goto cleanup; - - qemuDomainObjEnterMonitorWithDriver(driver, vm); - ret = qemuMonitorSetDrivePassphrase(priv->mon, - vm->def->disks[i]->info.alias, - secret); - VIR_FREE(secret); - qemuDomainObjExitMonitorWithDriver(driver, vm); - if (ret < 0) - goto cleanup; - } - } - -cleanup: - return ret; -} - - -#define QEMU_PCI_VENDOR_INTEL 0x8086 -#define QEMU_PCI_VENDOR_LSI_LOGIC 0x1000 -#define QEMU_PCI_VENDOR_REDHAT 0x1af4 -#define QEMU_PCI_VENDOR_CIRRUS 0x1013 -#define QEMU_PCI_VENDOR_REALTEK 0x10ec -#define QEMU_PCI_VENDOR_AMD 0x1022 -#define QEMU_PCI_VENDOR_ENSONIQ 0x1274 -#define QEMU_PCI_VENDOR_VMWARE 0x15ad -#define QEMU_PCI_VENDOR_QEMU 0x1234 - -#define QEMU_PCI_PRODUCT_DISK_VIRTIO 0x1001 - -#define QEMU_PCI_PRODUCT_BALLOON_VIRTIO 0x1002 - -#define QEMU_PCI_PRODUCT_NIC_NE2K 0x8029 -#define QEMU_PCI_PRODUCT_NIC_PCNET 0x2000 -#define QEMU_PCI_PRODUCT_NIC_RTL8139 0x8139 -#define QEMU_PCI_PRODUCT_NIC_E1000 0x100E -#define QEMU_PCI_PRODUCT_NIC_VIRTIO 0x1000 - -#define QEMU_PCI_PRODUCT_VGA_CIRRUS 0x00b8 -#define QEMU_PCI_PRODUCT_VGA_VMWARE 0x0405 -#define QEMU_PCI_PRODUCT_VGA_STDVGA 0x1111 - -#define QEMU_PCI_PRODUCT_AUDIO_AC97 0x2415 -#define QEMU_PCI_PRODUCT_AUDIO_ES1370 0x5000 - -#define QEMU_PCI_PRODUCT_CONTROLLER_PIIX 0x7010 -#define QEMU_PCI_PRODUCT_CONTROLLER_LSI 0x0012 - -#define QEMU_PCI_PRODUCT_WATCHDOG_I63000ESB 0x25ab - -static int -qemuAssignNextPCIAddress(virDomainDeviceInfo *info, - int vendor, - int product, - qemuMonitorPCIAddress *addrs, - int naddrs) -{ - int found = 0; - int i; - - VIR_DEBUG("Look for %x:%x out of %d", vendor, product, naddrs); - - for (i = 0 ; (i < naddrs) && !found; i++) { - VIR_DEBUG("Maybe %x:%x", addrs[i].vendor, addrs[i].product); - if (addrs[i].vendor == vendor && - addrs[i].product == product) { - VIR_DEBUG("Match %d", i); - found = 1; - break; - } - } - if (!found) { - return -1; - } - - /* Blank it out so this device isn't matched again */ - addrs[i].vendor = 0; - addrs[i].product = 0; - - if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) - info->type = VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI; - - if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) { - info->addr.pci.domain = addrs[i].addr.domain; - info->addr.pci.bus = addrs[i].addr.bus; - info->addr.pci.slot = addrs[i].addr.slot; - info->addr.pci.function = addrs[i].addr.function; - } - - return 0; -} - -static int -qemuGetPCIDiskVendorProduct(virDomainDiskDefPtr def, - unsigned *vendor, - unsigned *product) -{ - switch (def->bus) { - case VIR_DOMAIN_DISK_BUS_VIRTIO: - *vendor = QEMU_PCI_VENDOR_REDHAT; - *product = QEMU_PCI_PRODUCT_DISK_VIRTIO; - break; - - default: - return -1; - } - - return 0; -} - -static int -qemuGetPCINetVendorProduct(virDomainNetDefPtr def, - unsigned *vendor, - unsigned *product) -{ - if (!def->model) - return -1; - - if (STREQ(def->model, "ne2k_pci")) { - *vendor = QEMU_PCI_VENDOR_REALTEK; - *product = QEMU_PCI_PRODUCT_NIC_NE2K; - } else if (STREQ(def->model, "pcnet")) { - *vendor = QEMU_PCI_VENDOR_AMD; - *product = QEMU_PCI_PRODUCT_NIC_PCNET; - } else if (STREQ(def->model, "rtl8139")) { - *vendor = QEMU_PCI_VENDOR_REALTEK; - *product = QEMU_PCI_PRODUCT_NIC_RTL8139; - } else if (STREQ(def->model, "e1000")) { - *vendor = QEMU_PCI_VENDOR_INTEL; - *product = QEMU_PCI_PRODUCT_NIC_E1000; - } else if (STREQ(def->model, "virtio")) { - *vendor = QEMU_PCI_VENDOR_REDHAT; - *product = QEMU_PCI_PRODUCT_NIC_VIRTIO; - } else { - VIR_INFO("Unexpected NIC model %s, cannot get PCI address", - def->model); - return -1; - } - return 0; -} - -static int -qemuGetPCIControllerVendorProduct(virDomainControllerDefPtr def, - unsigned *vendor, - unsigned *product) -{ - switch (def->type) { - case VIR_DOMAIN_CONTROLLER_TYPE_SCSI: - *vendor = QEMU_PCI_VENDOR_LSI_LOGIC; - *product = QEMU_PCI_PRODUCT_CONTROLLER_LSI; - break; - - case VIR_DOMAIN_CONTROLLER_TYPE_FDC: - /* XXX we could put in the ISA bridge address, but - that's not technically the FDC's address */ - return -1; - - case VIR_DOMAIN_CONTROLLER_TYPE_IDE: - *vendor = QEMU_PCI_VENDOR_INTEL; - *product = QEMU_PCI_PRODUCT_CONTROLLER_PIIX; - break; - - default: - VIR_INFO("Unexpected controller type %s, cannot get PCI address", - virDomainControllerTypeToString(def->type)); - return -1; - } - - return 0; -} - -static int -qemuGetPCIVideoVendorProduct(virDomainVideoDefPtr def, - unsigned *vendor, - unsigned *product) -{ - switch (def->type) { - case VIR_DOMAIN_VIDEO_TYPE_CIRRUS: - *vendor = QEMU_PCI_VENDOR_CIRRUS; - *product = QEMU_PCI_PRODUCT_VGA_CIRRUS; - break; - - case VIR_DOMAIN_VIDEO_TYPE_VGA: - *vendor = QEMU_PCI_VENDOR_QEMU; - *product = QEMU_PCI_PRODUCT_VGA_STDVGA; - break; - - case VIR_DOMAIN_VIDEO_TYPE_VMVGA: - *vendor = QEMU_PCI_VENDOR_VMWARE; - *product = QEMU_PCI_PRODUCT_VGA_VMWARE; - break; - - default: - return -1; - } - return 0; -} - -static int -qemuGetPCISoundVendorProduct(virDomainSoundDefPtr def, - unsigned *vendor, - unsigned *product) -{ - switch (def->model) { - case VIR_DOMAIN_SOUND_MODEL_ES1370: - *vendor = QEMU_PCI_VENDOR_ENSONIQ; - *product = QEMU_PCI_PRODUCT_AUDIO_ES1370; - break; - - case VIR_DOMAIN_SOUND_MODEL_AC97: - *vendor = QEMU_PCI_VENDOR_INTEL; - *product = QEMU_PCI_PRODUCT_AUDIO_AC97; - break; - - default: - return -1; - } - - return 0; -} - -static int -qemuGetPCIWatchdogVendorProduct(virDomainWatchdogDefPtr def, - unsigned *vendor, - unsigned *product) -{ - switch (def->model) { - case VIR_DOMAIN_WATCHDOG_MODEL_I6300ESB: - *vendor = QEMU_PCI_VENDOR_INTEL; - *product = QEMU_PCI_PRODUCT_WATCHDOG_I63000ESB; - break; - - default: - return -1; - } - - return 0; -} - - -static int -qemuGetPCIMemballoonVendorProduct(virDomainMemballoonDefPtr def, - unsigned *vendor, - unsigned *product) -{ - switch (def->model) { - case VIR_DOMAIN_MEMBALLOON_MODEL_VIRTIO: - *vendor = QEMU_PCI_VENDOR_REDHAT; - *product = QEMU_PCI_PRODUCT_BALLOON_VIRTIO; - break; - - default: - return -1; - } - - return 0; -} - - -/* - * This entire method assumes that PCI devices in 'info pci' - * match ordering of devices specified on the command line - * wrt to devices of matching vendor+product - * - * XXXX this might not be a valid assumption if we assign - * some static addrs on CLI. Have to check that... - */ -static int -qemuDetectPCIAddresses(virDomainObjPtr vm, - qemuMonitorPCIAddress *addrs, - int naddrs) -{ - unsigned int vendor = 0, product = 0; - int i; - - /* XXX should all these vendor/product IDs be kept in the - * actual device data structure instead ? - */ - - for (i = 0 ; i < vm->def->ndisks ; i++) { - if (qemuGetPCIDiskVendorProduct(vm->def->disks[i], &vendor, &product) < 0) - continue; - - if (qemuAssignNextPCIAddress(&(vm->def->disks[i]->info), - vendor, product, - addrs, naddrs) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cannot find PCI address for VirtIO disk %s"), - vm->def->disks[i]->dst); - return -1; - } - } - - for (i = 0 ; i < vm->def->nnets ; i++) { - if (qemuGetPCINetVendorProduct(vm->def->nets[i], &vendor, &product) < 0) - continue; - - if (qemuAssignNextPCIAddress(&(vm->def->nets[i]->info), - vendor, product, - addrs, naddrs) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cannot find PCI address for %s NIC"), - vm->def->nets[i]->model); - return -1; - } - } - - for (i = 0 ; i < vm->def->ncontrollers ; i++) { - if (qemuGetPCIControllerVendorProduct(vm->def->controllers[i], &vendor, &product) < 0) - continue; - - if (qemuAssignNextPCIAddress(&(vm->def->controllers[i]->info), - vendor, product, - addrs, naddrs) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cannot find PCI address for controller %s"), - virDomainControllerTypeToString(vm->def->controllers[i]->type)); - return -1; - } - } - - for (i = 0 ; i < vm->def->nvideos ; i++) { - if (qemuGetPCIVideoVendorProduct(vm->def->videos[i], &vendor, &product) < 0) - continue; - - if (qemuAssignNextPCIAddress(&(vm->def->videos[i]->info), - vendor, product, - addrs, naddrs) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cannot find PCI address for video adapter %s"), - virDomainVideoTypeToString(vm->def->videos[i]->type)); - return -1; - } - } - - for (i = 0 ; i < vm->def->nsounds ; i++) { - if (qemuGetPCISoundVendorProduct(vm->def->sounds[i], &vendor, &product) < 0) - continue; - - if (qemuAssignNextPCIAddress(&(vm->def->sounds[i]->info), - vendor, product, - addrs, naddrs) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cannot find PCI address for sound adapter %s"), - virDomainSoundModelTypeToString(vm->def->sounds[i]->model)); - return -1; - } - } - - - if (vm->def->watchdog && - qemuGetPCIWatchdogVendorProduct(vm->def->watchdog, &vendor, &product) == 0) { - if (qemuAssignNextPCIAddress(&(vm->def->watchdog->info), - vendor, product, - addrs, naddrs) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cannot find PCI address for watchdog %s"), - virDomainWatchdogModelTypeToString(vm->def->watchdog->model)); - return -1; - } - } - - if (vm->def->memballoon && - qemuGetPCIMemballoonVendorProduct(vm->def->memballoon, &vendor, &product) == 0) { - if (qemuAssignNextPCIAddress(&(vm->def->memballoon->info), - vendor, product, - addrs, naddrs) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cannot find PCI address for balloon %s"), - virDomainMemballoonModelTypeToString(vm->def->memballoon->model)); - return -1; - } - } - - /* XXX console (virtio) */ - - - /* ... and now things we don't have in our xml */ - - /* XXX USB controller ? */ - - /* XXX what about other PCI devices (ie bridges) */ - - return 0; -} - -static int -qemuInitPCIAddresses(struct qemud_driver *driver, - virDomainObjPtr vm) -{ - qemuDomainObjPrivatePtr priv = vm->privateData; - int naddrs; - int ret; - qemuMonitorPCIAddress *addrs = NULL; - - qemuDomainObjEnterMonitorWithDriver(driver, vm); - naddrs = qemuMonitorGetAllPCIAddresses(priv->mon, - &addrs); - qemuDomainObjExitMonitorWithDriver(driver, vm); - - ret = qemuDetectPCIAddresses(vm, addrs, naddrs); - - VIR_FREE(addrs); - - return ret; -} - - -static int qemudNextFreePort(struct qemud_driver *driver, - int startPort) { - int i; - - for (i = startPort ; i < QEMU_VNC_PORT_MAX; i++) { - int fd; - int reuse = 1; - struct sockaddr_in addr; - bool used = false; - - if (virBitmapGetBit(driver->reservedVNCPorts, - i - QEMU_VNC_PORT_MIN, &used) < 0) - VIR_DEBUG("virBitmapGetBit failed on bit %d", i - QEMU_VNC_PORT_MIN); - - if (used) - continue; - - addr.sin_family = AF_INET; - addr.sin_port = htons(i); - addr.sin_addr.s_addr = htonl(INADDR_ANY); - fd = socket(PF_INET, SOCK_STREAM, 0); - if (fd < 0) - return -1; - - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void*)&reuse, sizeof(reuse)) < 0) { - VIR_FORCE_CLOSE(fd); - break; - } - - if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) { - /* Not in use, lets grab it */ - VIR_FORCE_CLOSE(fd); - /* Add port to bitmap of reserved ports */ - if (virBitmapSetBit(driver->reservedVNCPorts, - i - QEMU_VNC_PORT_MIN) < 0) { - VIR_DEBUG("virBitmapSetBit failed on bit %d", - i - QEMU_VNC_PORT_MIN); - } - return i; - } - VIR_FORCE_CLOSE(fd); + if ((qemu_driver->activePciHostdevs = pciDeviceListNew()) == NULL) + goto error; - if (errno == EADDRINUSE) { - /* In use, try next */ - continue; + if (privileged) { + if (chown(qemu_driver->libDir, qemu_driver->user, qemu_driver->group) < 0) { + virReportSystemError(errno, + _("unable to set ownership of '%s' to user %d:%d"), + qemu_driver->libDir, qemu_driver->user, qemu_driver->group); + goto error; } - /* Some other bad failure, get out.. */ - break; - } - return -1; -} - - -static void -qemuReturnPort(struct qemud_driver *driver, - int port) -{ - if (port < QEMU_VNC_PORT_MIN) - return; - - if (virBitmapClearBit(driver->reservedVNCPorts, - port - QEMU_VNC_PORT_MIN) < 0) - VIR_DEBUG("Could not mark port %d as unused", port); -} - - -static int -qemuAssignPCIAddresses(virDomainDefPtr def) -{ - int ret = -1; - unsigned long long qemuCmdFlags = 0; - qemuDomainPCIAddressSetPtr addrs = NULL; - - if (qemuCapsExtractVersionInfo(def->emulator, - NULL, - &qemuCmdFlags) < 0) - goto cleanup; - - if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { - if (!(addrs = qemuDomainPCIAddressSetCreate(def))) - goto cleanup; - - if (qemuAssignDevicePCISlots(def, addrs) < 0) - goto cleanup; - } - - ret = 0; - -cleanup: - qemuDomainPCIAddressSetFree(addrs); - - return ret; -} - - -static int -qemuPrepareChardevDevice(virDomainDefPtr def ATTRIBUTE_UNUSED, - virDomainChrDefPtr dev, - void *opaque ATTRIBUTE_UNUSED) -{ - int fd; - if (dev->source.type != VIR_DOMAIN_CHR_TYPE_FILE) - return 0; - - if ((fd = open(dev->source.data.file.path, - O_CREAT | O_APPEND, S_IRUSR|S_IWUSR)) < 0) { - virReportSystemError(errno, - _("Unable to pre-create chardev file '%s'"), - dev->source.data.file.path); - return -1; - } - - VIR_FORCE_CLOSE(fd); - - return 0; -} - - -struct qemudHookData { - virConnectPtr conn; - virDomainObjPtr vm; - struct qemud_driver *driver; -}; - -static int qemudSecurityHook(void *data) { - struct qemudHookData *h = data; - - /* This must take place before exec(), so that all QEMU - * memory allocation is on the correct NUMA node - */ - if (qemuAddToCgroup(h->driver, h->vm->def) < 0) - return -1; - - /* This must be done after cgroup placement to avoid resetting CPU - * affinity */ - if (qemudInitCpuAffinity(h->vm) < 0) - return -1; - - if (virSecurityManagerSetProcessLabel(h->driver->securityManager, h->vm) < 0) - return -1; - - return 0; -} - -static int -qemuPrepareMonitorChr(struct qemud_driver *driver, - virDomainChrSourceDefPtr monConfig, - const char *vm) -{ - monConfig->type = VIR_DOMAIN_CHR_TYPE_UNIX; - monConfig->data.nix.listen = true; - - if (virAsprintf(&monConfig->data.nix.path, "%s/%s.monitor", - driver->libDir, vm) < 0) { - virReportOOMError(); - return -1; - } - - return 0; -} - -static int qemuDomainSnapshotSetCurrentActive(virDomainObjPtr vm, - char *snapshotDir); -static int qemuDomainSnapshotSetCurrentInactive(virDomainObjPtr vm, - char *snapshotDir); - - -#define START_POSTFIX ": starting up\n" -#define SHUTDOWN_POSTFIX ": shutting down\n" - -static int qemudStartVMDaemon(virConnectPtr conn, - struct qemud_driver *driver, - virDomainObjPtr vm, - const char *migrateFrom, - bool start_paused, - int stdin_fd, - const char *stdin_path, - enum virVMOperationType vmop) { - int ret; - unsigned long long qemuCmdFlags; - off_t pos = -1; - char ebuf[1024]; - char *pidfile = NULL; - int logfile = -1; - char *timestamp; - qemuDomainObjPrivatePtr priv = vm->privateData; - virCommandPtr cmd = NULL; - - struct qemudHookData hookData; - hookData.conn = conn; - hookData.vm = vm; - hookData.driver = driver; - - DEBUG0("Beginning VM startup process"); - - if (virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_OPERATION_INVALID, - "%s", _("VM is already active")); - return -1; - } - - /* Do this upfront, so any part of the startup process can add - * runtime state to vm->def that won't be persisted. This let's us - * report implicit runtime defaults in the XML, like vnc listen/socket - */ - DEBUG0("Setting current domain def as transient"); - if (virDomainObjSetDefTransient(driver->caps, vm, true) < 0) - goto cleanup; - - /* Must be run before security labelling */ - DEBUG0("Preparing host devices"); - if (qemuPrepareHostDevices(driver, vm->def) < 0) - goto cleanup; - - DEBUG0("Preparing chr devices"); - if (virDomainChrDefForeach(vm->def, - true, - qemuPrepareChardevDevice, - NULL) < 0) - goto cleanup; - - /* If you are using a SecurityDriver with dynamic labelling, - then generate a security label for isolation */ - DEBUG0("Generating domain security label (if required)"); - if (virSecurityManagerGenLabel(driver->securityManager, vm) < 0) { - qemuDomainSecurityLabelAudit(vm, false); - goto cleanup; - } - qemuDomainSecurityLabelAudit(vm, true); - - DEBUG0("Generating setting domain security labels (if required)"); - if (virSecurityManagerSetAllLabel(driver->securityManager, - vm, stdin_path) < 0) - goto cleanup; - - if (stdin_fd != -1) { - /* if there's an fd to migrate from, and it's a pipe, put the - * proper security label on it - */ - struct stat stdin_sb; - - DEBUG0("setting security label on pipe used for migration"); - - if (fstat(stdin_fd, &stdin_sb) < 0) { + if (chown(qemu_driver->cacheDir, qemu_driver->user, qemu_driver->group) < 0) { virReportSystemError(errno, - _("cannot stat fd %d"), stdin_fd); - goto cleanup; + _("unable to set ownership of '%s' to %d:%d"), + qemu_driver->cacheDir, qemu_driver->user, qemu_driver->group); + goto error; } - if (S_ISFIFO(stdin_sb.st_mode) && - virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) - goto cleanup; - } - - /* Ensure no historical cgroup for this VM is lying around bogus - * settings */ - DEBUG0("Ensuring no historical cgroup is lying around"); - qemuRemoveCgroup(driver, vm, 1); - - if (vm->def->ngraphics == 1) { - if (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && - !vm->def->graphics[0]->data.vnc.socket && - vm->def->graphics[0]->data.vnc.autoport) { - int port = qemudNextFreePort(driver, QEMU_VNC_PORT_MIN); - if (port < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Unable to find an unused VNC port")); - goto cleanup; - } - vm->def->graphics[0]->data.vnc.port = port; - } else if (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE && - vm->def->graphics[0]->data.spice.autoport) { - int port = qemudNextFreePort(driver, QEMU_VNC_PORT_MIN); - int tlsPort = -1; - if (port < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Unable to find an unused SPICE port")); - goto cleanup; - } - - if (driver->spiceTLS) { - tlsPort = qemudNextFreePort(driver, port + 1); - if (tlsPort < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Unable to find an unused SPICE TLS port")); - qemuReturnPort(driver, port); - goto cleanup; - } - } - - vm->def->graphics[0]->data.spice.port = port; - vm->def->graphics[0]->data.spice.tlsPort = tlsPort; + if (chown(qemu_driver->saveDir, qemu_driver->user, qemu_driver->group) < 0) { + virReportSystemError(errno, + _("unable to set ownership of '%s' to %d:%d"), + qemu_driver->saveDir, qemu_driver->user, qemu_driver->group); + goto error; + } + if (chown(qemu_driver->snapshotDir, qemu_driver->user, qemu_driver->group) < 0) { + virReportSystemError(errno, + _("unable to set ownership of '%s' to %d:%d"), + qemu_driver->snapshotDir, qemu_driver->user, qemu_driver->group); + goto error; } } - if (virFileMakePath(driver->logDir) != 0) { - virReportSystemError(errno, - _("cannot create log directory %s"), - driver->logDir); - goto cleanup; - } - - DEBUG0("Creating domain log file"); - if ((logfile = qemudLogFD(driver, vm->def->name, false)) < 0) - goto cleanup; - - DEBUG0("Determining emulator version"); - if (qemuCapsExtractVersionInfo(vm->def->emulator, - NULL, - &qemuCmdFlags) < 0) - goto cleanup; - - DEBUG0("Setting up domain cgroup (if required)"); - if (qemuSetupCgroup(driver, vm) < 0) - goto cleanup; - - if (VIR_ALLOC(priv->monConfig) < 0) { - virReportOOMError(); - goto cleanup; - } - - DEBUG0("Preparing monitor state"); - if (qemuPrepareMonitorChr(driver, priv->monConfig, vm->def->name) < 0) - goto cleanup; - -#if HAVE_YAJL - if (qemuCmdFlags & QEMUD_CMD_FLAG_MONITOR_JSON) - priv->monJSON = 1; - else -#endif - priv->monJSON = 0; - - priv->monitor_warned = 0; - priv->gotShutdown = false; - - if ((ret = virFileDeletePid(driver->stateDir, vm->def->name)) != 0) { - virReportSystemError(ret, - _("Cannot remove stale PID file for %s"), - vm->def->name); - goto cleanup; - } - - if (!(pidfile = virFilePid(driver->stateDir, vm->def->name))) { - virReportSystemError(errno, - "%s", _("Failed to build pidfile path.")); - goto cleanup; - } - - /* - * Normally PCI addresses are assigned in the virDomainCreate - * or virDomainDefine methods. We might still need to assign - * some here to cope with the question of upgrades. Regardless - * we also need to populate the PCi address set cache for later - * use in hotplug + /* If hugetlbfs is present, then we need to create a sub-directory within + * it, since we can't assume the root mount point has permissions that + * will let our spawned QEMU instances use it. + * + * NB the check for '/', since user may config "" to disable hugepages + * even when mounted */ - if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { - DEBUG0("Assigning domain PCI addresses"); - /* Populate cache with current addresses */ - if (priv->pciaddrs) { - qemuDomainPCIAddressSetFree(priv->pciaddrs); - priv->pciaddrs = NULL; - } - if (!(priv->pciaddrs = qemuDomainPCIAddressSetCreate(vm->def))) - goto cleanup; - - - /* Assign any remaining addresses */ - if (qemuAssignDevicePCISlots(vm->def, priv->pciaddrs) < 0) - goto cleanup; - - priv->persistentAddrs = 1; - } else { - priv->persistentAddrs = 0; - } - - DEBUG0("Building emulator command line"); - vm->def->id = driver->nextvmid++; - if (!(cmd = qemuBuildCommandLine(conn, driver, vm->def, priv->monConfig, - priv->monJSON != 0, qemuCmdFlags, - migrateFrom, stdin_fd, - vm->current_snapshot, vmop))) - goto cleanup; - - if (qemuDomainSnapshotSetCurrentInactive(vm, driver->snapshotDir) < 0) - goto cleanup; - - /* now that we know it is about to start call the hook if present */ - if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) { - char *xml = virDomainDefFormat(vm->def, 0); - int hookret; - - hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, - VIR_HOOK_QEMU_OP_START, VIR_HOOK_SUBOP_BEGIN, NULL, xml); - VIR_FREE(xml); - - /* - * If the script raised an error abort the launch - */ - if (hookret < 0) - goto cleanup; - } + if (qemu_driver->hugetlbfs_mount && + qemu_driver->hugetlbfs_mount[0] == '/') { + char *mempath = NULL; + if (virAsprintf(&mempath, "%s/libvirt/qemu", qemu_driver->hugetlbfs_mount) < 0) + goto out_of_memory; - if ((timestamp = virTimestamp()) == NULL) { - virReportOOMError(); - goto cleanup; - } else { - if (safewrite(logfile, timestamp, strlen(timestamp)) < 0 || - safewrite(logfile, START_POSTFIX, strlen(START_POSTFIX)) < 0) { - VIR_WARN("Unable to write timestamp to logfile: %s", - virStrerror(errno, ebuf, sizeof ebuf)); + if ((rc = virFileMakePath(mempath)) != 0) { + virReportSystemError(rc, + _("unable to create hugepage path %s"), mempath); + VIR_FREE(mempath); + goto error; } - - VIR_FREE(timestamp); - } - - virCommandWriteArgLog(cmd, logfile); - - if ((pos = lseek(logfile, 0, SEEK_END)) < 0) - VIR_WARN("Unable to seek to end of logfile: %s", - virStrerror(errno, ebuf, sizeof ebuf)); - - VIR_DEBUG("Clear emulator capabilities: %d", - driver->clearEmulatorCapabilities); - if (driver->clearEmulatorCapabilities) - virCommandClearCaps(cmd); - - virCommandSetPreExecHook(cmd, qemudSecurityHook, &hookData); - - virCommandSetOutputFD(cmd, &logfile); - virCommandSetErrorFD(cmd, &logfile); - virCommandNonblockingFDs(cmd); - virCommandSetPidFile(cmd, pidfile); - virCommandDaemonize(cmd); - - ret = virCommandRun(cmd, NULL); - VIR_FREE(pidfile); - - /* wait for qemu process to to show up */ - if (ret == 0) { - if (virFileReadPid(driver->stateDir, vm->def->name, &vm->pid)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("Domain %s didn't show up"), vm->def->name); - ret = -1; + if (qemu_driver->privileged && + chown(mempath, qemu_driver->user, qemu_driver->group) < 0) { + virReportSystemError(errno, + _("unable to set ownership on %s to %d:%d"), + mempath, qemu_driver->user, qemu_driver->group); + VIR_FREE(mempath); + goto error; } -#if 0 - } else if (ret == -2) { - /* - * XXX this is bogus. It isn't safe to set vm->pid = child - * because the child no longer exists. - */ - /* The virExec process that launches the daemon failed. Pending on - * when it failed (we can't determine for sure), there may be - * extra info in the domain log (if the hook failed for example). - * - * Pretend like things succeeded, and let 'WaitForMonitor' report - * the log contents for us. - */ - vm->pid = child; - ret = 0; -#endif + qemu_driver->hugepage_path = mempath; } - if (migrateFrom) - start_paused = true; - vm->state = start_paused ? VIR_DOMAIN_PAUSED : VIR_DOMAIN_RUNNING; - - if (ret == -1) /* The VM failed to start; tear filters before taps */ - virDomainConfVMNWFilterTeardown(vm); - - if (ret == -1) /* The VM failed to start */ - goto cleanup; + /* Get all the running persistent or transient configs first */ + if (virDomainLoadAllConfigs(qemu_driver->caps, + &qemu_driver->domains, + qemu_driver->stateDir, + NULL, + 1, NULL, NULL) < 0) + goto error; - DEBUG0("Waiting for monitor to show up"); - if (qemudWaitForMonitor(driver, vm, pos) < 0) - goto cleanup; + conn = virConnectOpen(qemu_driver->privileged ? + "qemu:///system" : + "qemu:///session"); - DEBUG0("Detecting VCPU PIDs"); - if (qemuDetectVcpuPIDs(driver, vm) < 0) - goto cleanup; + qemuProcessReconnectAll(conn, qemu_driver); - DEBUG0("Setting any required VM passwords"); - if (qemuInitPasswords(conn, driver, vm, qemuCmdFlags) < 0) - goto cleanup; + /* Then inactive persistent configs */ + if (virDomainLoadAllConfigs(qemu_driver->caps, + &qemu_driver->domains, + qemu_driver->configDir, + qemu_driver->autostartDir, + 0, NULL, NULL) < 0) + goto error; - /* If we have -device, then addresses are assigned explicitly. - * If not, then we have to detect dynamic ones here */ - if (!(qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE)) { - DEBUG0("Determining domain device PCI addresses"); - if (qemuInitPCIAddresses(driver, vm) < 0) - goto cleanup; - } - DEBUG0("Setting initial memory amount"); - qemuDomainObjEnterMonitorWithDriver(driver, vm); - if (qemuMonitorSetBalloon(priv->mon, vm->def->mem.cur_balloon) < 0) { - qemuDomainObjExitMonitorWithDriver(driver, vm); - goto cleanup; - } - qemuDomainObjExitMonitorWithDriver(driver, vm); + virHashForEach(qemu_driver->domains.objs, qemuDomainSnapshotLoad, + qemu_driver->snapshotDir); - if (!start_paused) { - DEBUG0("Starting domain CPUs"); - /* Allow the CPUS to start executing */ - if (doStartCPUs(driver, vm, conn) < 0) { - if (virGetLastError() == NULL) - qemuReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("resume operation failed")); - goto cleanup; - } - } + qemuDriverUnlock(qemu_driver); + qemuAutostartDomains(qemu_driver); - DEBUG0("Writing domain status to disk"); - if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) - goto cleanup; + qemu_driver->workerPool = virThreadPoolNew(0, 1, processWatchdogEvent, qemu_driver); + if (!qemu_driver->workerPool) + goto error; - virCommandFree(cmd); - VIR_FORCE_CLOSE(logfile); + if (conn) + virConnectClose(conn); return 0; -cleanup: - /* We jump here if we failed to start the VM for any reason, or - * if we failed to initialize the now running VM. kill it off and - * pretend we never started it */ - virCommandFree(cmd); - VIR_FORCE_CLOSE(logfile); - qemudShutdownVMDaemon(driver, vm, 0); - +out_of_memory: + virReportOOMError(); +error: + if (qemu_driver) + qemuDriverUnlock(qemu_driver); + if (conn) + virConnectClose(conn); + VIR_FREE(base); + VIR_FREE(driverConf); + qemudShutdown(); return -1; } -static void qemudShutdownVMDaemon(struct qemud_driver *driver, - virDomainObjPtr vm, - int migrated) { - int ret; - int retries = 0; - qemuDomainObjPrivatePtr priv = vm->privateData; - virErrorPtr orig_err; - virDomainDefPtr def; - int i; - int logfile = -1; - char *timestamp; - char ebuf[1024]; - - VIR_DEBUG("Shutting down VM '%s' pid=%d migrated=%d", - vm->def->name, vm->pid, migrated); +static void qemudNotifyLoadDomain(virDomainObjPtr vm, int newVM, void *opaque) +{ + struct qemud_driver *driver = opaque; - if (!virDomainObjIsActive(vm)) { - VIR_DEBUG("VM '%s' not active", vm->def->name); - return; + if (newVM) { + virDomainEventPtr event = + virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_DEFINED, + VIR_DOMAIN_EVENT_DEFINED_ADDED); + if (event) + qemuDomainEventQueue(driver, event); } +} - if ((logfile = qemudLogFD(driver, vm->def->name, true)) < 0) { - /* To not break the normal domain shutdown process, skip the - * timestamp log writing if failed on opening log file. */ - VIR_WARN("Unable to open logfile: %s", - virStrerror(errno, ebuf, sizeof ebuf)); - } else { - if ((timestamp = virTimestamp()) == NULL) { - virReportOOMError(); - } else { - if (safewrite(logfile, timestamp, strlen(timestamp)) < 0 || - safewrite(logfile, SHUTDOWN_POSTFIX, - strlen(SHUTDOWN_POSTFIX)) < 0) { - VIR_WARN("Unable to write timestamp to logfile: %s", - virStrerror(errno, ebuf, sizeof ebuf)); - } - - VIR_FREE(timestamp); - } +/** + * qemudReload: + * + * Function to restart the QEmu daemon, it will recheck the configuration + * files and update its state and the networking + */ +static int +qemudReload(void) { + if (!qemu_driver) + return 0; - if (VIR_CLOSE(logfile) < 0) - VIR_WARN("Unable to close logfile: %s", - virStrerror(errno, ebuf, sizeof ebuf)); - } + qemuDriverLock(qemu_driver); + virDomainLoadAllConfigs(qemu_driver->caps, + &qemu_driver->domains, + qemu_driver->configDir, + qemu_driver->autostartDir, + 0, qemudNotifyLoadDomain, qemu_driver); + qemuDriverUnlock(qemu_driver); - /* This method is routinely used in clean up paths. Disable error - * reporting so we don't squash a legit error. */ - orig_err = virSaveLastError(); + qemuAutostartDomains(qemu_driver); - virDomainConfVMNWFilterTeardown(vm); + return 0; +} - if (driver->macFilter) { - def = vm->def; - for (i = 0 ; i < def->nnets ; i++) { - virDomainNetDefPtr net = def->nets[i]; - if (net->ifname == NULL) - continue; - if ((errno = networkDisallowMacOnPort(driver, net->ifname, - net->mac))) { - virReportSystemError(errno, - _("failed to remove ebtables rule to allow MAC address on '%s'"), - net->ifname); - } - } - } +/** + * qemudActive: + * + * Checks if the QEmu daemon is active, i.e. has an active domain or + * an active network + * + * Returns 1 if active, 0 otherwise + */ +static int +qemudActive(void) { + int active = 0; - /* This will safely handle a non-running guest with pid=0 or pid=-1*/ - if (virKillProcess(vm->pid, 0) == 0 && - virKillProcess(vm->pid, SIGTERM) < 0) - virReportSystemError(errno, - _("Failed to send SIGTERM to %s (%d)"), - vm->def->name, vm->pid); + if (!qemu_driver) + return 0; - if (priv->mon) - qemuMonitorClose(priv->mon); + /* XXX having to iterate here is not great because it requires many locks */ + qemuDriverLock(qemu_driver); + active = virDomainObjListNumOfDomains(&qemu_driver->domains, 1); + qemuDriverUnlock(qemu_driver); + return active; +} - if (priv->monConfig) { - if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX) - unlink(priv->monConfig->data.nix.path); - virDomainChrSourceDefFree(priv->monConfig); - priv->monConfig = NULL; - } +/** + * qemudShutdown: + * + * Shutdown the QEmu daemon, it will stop all active domains and networks + */ +static int +qemudShutdown(void) { + int i; - /* shut it off for sure */ - virKillProcess(vm->pid, SIGKILL); + if (!qemu_driver) + return -1; - /* now that we know it's stopped call the hook if present */ - if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) { - char *xml = virDomainDefFormat(vm->def, 0); + qemuDriverLock(qemu_driver); + pciDeviceListFree(qemu_driver->activePciHostdevs); + virCapabilitiesFree(qemu_driver->caps); - /* we can't stop the operation even if the script raised an error */ - virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, - VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END, NULL, xml); - VIR_FREE(xml); - } + virDomainObjListDeinit(&qemu_driver->domains); + virBitmapFree(qemu_driver->reservedVNCPorts); - /* Reset Security Labels */ - virSecurityManagerRestoreAllLabel(driver->securityManager, - vm, migrated); - virSecurityManagerReleaseLabel(driver->securityManager, vm); + virSysinfoDefFree(qemu_driver->hostsysinfo); - /* Clear out dynamically assigned labels */ - if (vm->def->seclabel.type == VIR_DOMAIN_SECLABEL_DYNAMIC) { - VIR_FREE(vm->def->seclabel.model); - VIR_FREE(vm->def->seclabel.label); - VIR_FREE(vm->def->seclabel.imagelabel); - } + VIR_FREE(qemu_driver->configDir); + VIR_FREE(qemu_driver->autostartDir); + VIR_FREE(qemu_driver->logDir); + VIR_FREE(qemu_driver->stateDir); + VIR_FREE(qemu_driver->libDir); + VIR_FREE(qemu_driver->cacheDir); + VIR_FREE(qemu_driver->saveDir); + VIR_FREE(qemu_driver->snapshotDir); + VIR_FREE(qemu_driver->autoDumpPath); + VIR_FREE(qemu_driver->vncTLSx509certdir); + VIR_FREE(qemu_driver->vncListen); + VIR_FREE(qemu_driver->vncPassword); + VIR_FREE(qemu_driver->vncSASLdir); + VIR_FREE(qemu_driver->spiceTLSx509certdir); + VIR_FREE(qemu_driver->spiceListen); + VIR_FREE(qemu_driver->spicePassword); + VIR_FREE(qemu_driver->hugetlbfs_mount); + VIR_FREE(qemu_driver->hugepage_path); + VIR_FREE(qemu_driver->saveImageFormat); + VIR_FREE(qemu_driver->dumpImageFormat); - virDomainDefClearDeviceAliases(vm->def); - if (!priv->persistentAddrs) { - virDomainDefClearPCIAddresses(vm->def); - qemuDomainPCIAddressSetFree(priv->pciaddrs); - priv->pciaddrs = NULL; - } + virSecurityManagerFree(qemu_driver->securityManager); - qemuDomainReAttachHostDevices(driver, vm->def); + ebtablesContextFree(qemu_driver->ebtables); -#if WITH_MACVTAP - def = vm->def; - for (i = 0; i < def->nnets; i++) { - virDomainNetDefPtr net = def->nets[i]; - if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { - delMacvtap(net->ifname, net->mac, net->data.direct.linkdev, - &net->data.direct.virtPortProfile); - VIR_FREE(net->ifname); - } + if (qemu_driver->cgroupDeviceACL) { + for (i = 0 ; qemu_driver->cgroupDeviceACL[i] != NULL ; i++) + VIR_FREE(qemu_driver->cgroupDeviceACL[i]); + VIR_FREE(qemu_driver->cgroupDeviceACL); } -#endif -retry: - if ((ret = qemuRemoveCgroup(driver, vm, 0)) < 0) { - if (ret == -EBUSY && (retries++ < 5)) { - usleep(200*1000); - goto retry; - } - VIR_WARN("Failed to remove cgroup for %s", - vm->def->name); - } + /* Free domain callback list */ + virDomainEventCallbackListFree(qemu_driver->domainEventCallbacks); + virDomainEventQueueFree(qemu_driver->domainEventQueue); - qemudRemoveDomainStatus(driver, vm); + if (qemu_driver->domainEventTimer != -1) + virEventRemoveTimeout(qemu_driver->domainEventTimer); - /* Remove VNC port from port reservation bitmap, but only if it was - reserved by the driver (autoport=yes) - */ - if ((vm->def->ngraphics == 1) && - vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && - vm->def->graphics[0]->data.vnc.autoport) { - qemuReturnPort(driver, vm->def->graphics[0]->data.vnc.port); - } - if ((vm->def->ngraphics == 1) && - vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE && - vm->def->graphics[0]->data.spice.autoport) { - qemuReturnPort(driver, vm->def->graphics[0]->data.spice.port); - qemuReturnPort(driver, vm->def->graphics[0]->data.spice.tlsPort); - } + if (qemu_driver->brctl) + brShutdown(qemu_driver->brctl); - vm->pid = -1; - vm->def->id = -1; - vm->state = VIR_DOMAIN_SHUTOFF; - VIR_FREE(priv->vcpupids); - priv->nvcpupids = 0; + virCgroupFree(&qemu_driver->cgroup); - if (vm->newDef) { - virDomainDefFree(vm->def); - vm->def = vm->newDef; - vm->def->id = -1; - vm->newDef = NULL; - } + qemuDriverUnlock(qemu_driver); + virMutexDestroy(&qemu_driver->lock); + virThreadPoolFree(qemu_driver->workerPool); + VIR_FREE(qemu_driver); - if (orig_err) { - virSetError(orig_err); - virFreeError(orig_err); - } + return 0; } + +static int qemuDomainSnapshotSetCurrentActive(virDomainObjPtr vm, + char *snapshotDir); +static int qemuDomainSnapshotSetCurrentInactive(virDomainObjPtr vm, + char *snapshotDir); + + static virDrvOpenStatus qemudOpen(virConnectPtr conn, virConnectAuthPtr auth ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED) { @@ -3603,7 +1238,7 @@ static virDomainPtr qemudDomainCreate(virConnectPtr conn, const char *xml, if (qemudCanonicalizeMachine(driver, def) < 0) goto cleanup; - if (qemuAssignPCIAddresses(def) < 0) + if (qemuDomainAssignPCIAddresses(def) < 0) goto cleanup; if (!(vm = virDomainAssignDef(driver->caps, @@ -3616,9 +1251,9 @@ static virDomainPtr qemudDomainCreate(virConnectPtr conn, const char *xml, if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) goto cleanup; /* XXXX free the 'vm' we created ? */ - if (qemudStartVMDaemon(conn, driver, vm, NULL, - (flags & VIR_DOMAIN_START_PAUSED) != 0, - -1, NULL, VIR_VM_OP_CREATE) < 0) { + if (qemuProcessStart(conn, driver, vm, NULL, + (flags & VIR_DOMAIN_START_PAUSED) != 0, + -1, NULL, VIR_VM_OP_CREATE) < 0) { qemuDomainStartAudit(vm, "booted", false); if (qemuDomainObjEndJob(vm) > 0) virDomainRemoveInactive(&driver->domains, @@ -3693,7 +1328,7 @@ static int qemudDomainSuspend(virDomainPtr dom) { goto endjob; } if (vm->state != VIR_DOMAIN_PAUSED) { - if (doStopCPUs(driver, vm) < 0) { + if (qemuProcessStopCPUs(driver, vm) < 0) { goto endjob; } event = virDomainEventNewFromObj(vm, @@ -3746,7 +1381,7 @@ static int qemudDomainResume(virDomainPtr dom) { goto endjob; } if (vm->state == VIR_DOMAIN_PAUSED) { - if (doStartCPUs(driver, vm, dom->conn) < 0) { + if (qemuProcessStartCPUs(driver, vm, dom->conn) < 0) { if (virGetLastError() == NULL) qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("resume operation failed")); @@ -3841,7 +1476,7 @@ static int qemudDomainDestroy(virDomainPtr dom) { goto endjob; } - qemudShutdownVMDaemon(driver, vm, 0); + qemuProcessStop(driver, vm, 0); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, VIR_DOMAIN_EVENT_STOPPED_DESTROYED); @@ -4065,7 +1700,7 @@ qemuDomainMigrateOffline(struct qemud_driver *driver, { int ret; - ret = doStopCPUs(driver, vm); + ret = qemuProcessStopCPUs(driver, vm); if (ret == 0) { virDomainEventPtr event; @@ -4336,7 +1971,7 @@ static int qemudDomainSaveFlag(struct qemud_driver *driver, virDomainPtr dom, /* Pause */ if (vm->state == VIR_DOMAIN_RUNNING) { header.was_running = 1; - if (doStopCPUs(driver, vm) < 0) + if (qemuProcessStopCPUs(driver, vm) < 0) goto endjob; if (!virDomainObjIsActive(vm)) { @@ -4546,7 +2181,7 @@ static int qemudDomainSaveFlag(struct qemud_driver *driver, virDomainPtr dom, ret = 0; /* Shut it down */ - qemudShutdownVMDaemon(driver, vm, 0); + qemuProcessStop(driver, vm, 0); qemuDomainStopAudit(vm, "saved"); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, @@ -4562,7 +2197,7 @@ endjob: if (vm) { if (ret != 0) { if (header.was_running && virDomainObjIsActive(vm)) { - rc = doStartCPUs(driver, vm, dom->conn); + rc = qemuProcessStartCPUs(driver, vm, dom->conn); if (rc < 0) VIR_WARN0("Unable to resume guest CPUs after save failure"); } @@ -4916,7 +2551,7 @@ static int qemudDomainCoreDump(virDomainPtr dom, /* Pause domain for non-live dump */ if (!(flags & VIR_DUMP_LIVE) && vm->state == VIR_DOMAIN_RUNNING) { - if (doStopCPUs(driver, vm) < 0) + if (qemuProcessStopCPUs(driver, vm) < 0) goto endjob; paused = 1; @@ -4935,7 +2570,7 @@ static int qemudDomainCoreDump(virDomainPtr dom, endjob: if ((ret == 0) && (flags & VIR_DUMP_CRASH)) { - qemudShutdownVMDaemon(driver, vm, 0); + qemuProcessStop(driver, vm, 0); qemuDomainStopAudit(vm, "crashed"); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, @@ -4946,7 +2581,7 @@ endjob: will support synchronous operations so we always get here after the migration is complete. */ else if (resume && paused && virDomainObjIsActive(vm)) { - if (doStartCPUs(driver, vm, dom->conn) < 0) { + if (qemuProcessStartCPUs(driver, vm, dom->conn) < 0) { if (virGetLastError() == NULL) qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("resuming after dump failed")); @@ -4973,7 +2608,7 @@ cleanup: static void processWatchdogEvent(void *data, void *opaque) { int ret; - struct watchdogEvent *wdEvent = data; + struct qemuDomainWatchdogEvent *wdEvent = data; struct qemud_driver *driver = opaque; switch (wdEvent->action) { @@ -5007,7 +2642,7 @@ static void processWatchdogEvent(void *data, void *opaque) qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("Dump failed")); - ret = doStartCPUs(driver, wdEvent->vm, NULL); + ret = qemuProcessStartCPUs(driver, wdEvent->vm, NULL); if (ret < 0) qemuReportError(VIR_ERR_OPERATION_FAILED, @@ -5797,8 +3432,8 @@ qemudDomainSaveImageStartVM(virConnectPtr conn, } /* Set the migration source and start it up. */ - ret = qemudStartVMDaemon(conn, driver, vm, "stdio", true, fd, path, - VIR_VM_OP_RESTORE); + ret = qemuProcessStart(conn, driver, vm, "stdio", true, fd, path, + VIR_VM_OP_RESTORE); if (intermediate_pid != -1) { if (ret < 0) { @@ -5857,7 +3492,7 @@ qemudDomainSaveImageStartVM(virConnectPtr conn, /* If it was running before, resume it now. */ if (header->was_running) { - if (doStartCPUs(driver, vm, conn) < 0) { + if (qemuProcessStartCPUs(driver, vm, conn) < 0) { if (virGetLastError() == NULL) qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("failed to resume domain")); @@ -6157,7 +3792,7 @@ static char *qemuDomainXMLToNative(virConnectPtr conn, &qemuCmdFlags) < 0) goto cleanup; - if (qemuPrepareMonitorChr(driver, &monConfig, def->name) < 0) + if (qemuProcessPrepareMonitorChr(driver, &monConfig, def->name) < 0) goto cleanup; if (!(cmd = qemuBuildCommandLine(conn, driver, def, @@ -6223,8 +3858,8 @@ static int qemudDomainObjStart(virConnectPtr conn, goto cleanup; } - ret = qemudStartVMDaemon(conn, driver, vm, NULL, start_paused, -1, NULL, - VIR_VM_OP_CREATE); + ret = qemuProcessStart(conn, driver, vm, NULL, start_paused, -1, NULL, + VIR_VM_OP_CREATE); qemuDomainStartAudit(vm, "booted", ret >= 0); if (ret >= 0) { virDomainEventPtr event = @@ -6411,7 +4046,7 @@ static virDomainPtr qemudDomainDefine(virConnectPtr conn, const char *xml) { if (qemudCanonicalizeMachine(driver, def) < 0) goto cleanup; - if (qemuAssignPCIAddresses(def) < 0) + if (qemuDomainAssignPCIAddresses(def) < 0) goto cleanup; if (!(vm = virDomainAssignDef(driver->caps, @@ -7915,60 +5550,6 @@ qemuDomainEventDeregisterAny(virConnectPtr conn, } -static void qemuDomainEventDispatchFunc(virConnectPtr conn, - virDomainEventPtr event, - virConnectDomainEventGenericCallback cb, - void *cbopaque, - void *opaque) -{ - struct qemud_driver *driver = opaque; - - /* Drop the lock whle dispatching, for sake of re-entrancy */ - qemuDriverUnlock(driver); - virDomainEventDispatchDefaultFunc(conn, event, cb, cbopaque, NULL); - qemuDriverLock(driver); -} - -static void qemuDomainEventFlush(int timer ATTRIBUTE_UNUSED, void *opaque) -{ - struct qemud_driver *driver = opaque; - virDomainEventQueue tempQueue; - - qemuDriverLock(driver); - - driver->domainEventDispatching = 1; - - /* Copy the queue, so we're reentrant safe */ - tempQueue.count = driver->domainEventQueue->count; - tempQueue.events = driver->domainEventQueue->events; - driver->domainEventQueue->count = 0; - driver->domainEventQueue->events = NULL; - - virEventUpdateTimeout(driver->domainEventTimer, -1); - virDomainEventQueueDispatch(&tempQueue, - driver->domainEventCallbacks, - qemuDomainEventDispatchFunc, - driver); - - /* Purge any deleted callbacks */ - virDomainEventCallbackListPurgeMarked(driver->domainEventCallbacks); - - driver->domainEventDispatching = 0; - qemuDriverUnlock(driver); -} - - -/* driver must be locked before calling */ -static void qemuDomainEventQueue(struct qemud_driver *driver, - virDomainEventPtr event) -{ - if (virDomainEventQueuePush(driver->domainEventQueue, - event) < 0) - virDomainEventFree(event); - if (qemu_driver->domainEventQueue->count == 1) - virEventUpdateTimeout(driver->domainEventTimer, 0); -} - /* Migration support. */ static bool ATTRIBUTE_NONNULL(1) @@ -8100,12 +5681,12 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, /* Start the QEMU daemon, with the same command-line arguments plus * -incoming unix:/path/to/file or exec:nc -U /path/to/file */ - internalret = qemudStartVMDaemon(dconn, driver, vm, migrateFrom, true, - -1, NULL, VIR_VM_OP_MIGRATE_IN_START); + internalret = qemuProcessStart(dconn, driver, vm, migrateFrom, true, + -1, NULL, VIR_VM_OP_MIGRATE_IN_START); VIR_FREE(migrateFrom); if (internalret < 0) { qemuDomainStartAudit(vm, "migrated", false); - /* Note that we don't set an error here because qemudStartVMDaemon + /* Note that we don't set an error here because qemuProcessStart * should have already done that. */ if (!vm->persistent) { @@ -8119,7 +5700,7 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, unixfile, false) < 0) { qemuDomainStartAudit(vm, "migrated", false); - qemudShutdownVMDaemon(driver, vm, 0); + qemuProcessStop(driver, vm, 0); if (!vm->persistent) { if (qemuDomainObjEndJob(vm) > 0) virDomainRemoveInactive(&driver->domains, vm); @@ -8346,10 +5927,10 @@ qemudDomainMigratePrepare2 (virConnectPtr dconn, * -incoming tcp:0.0.0.0:port */ snprintf (migrateFrom, sizeof (migrateFrom), "tcp:0.0.0.0:%d", this_port); - if (qemudStartVMDaemon (dconn, driver, vm, migrateFrom, true, - -1, NULL, VIR_VM_OP_MIGRATE_IN_START) < 0) { + if (qemuProcessStart(dconn, driver, vm, migrateFrom, true, + -1, NULL, VIR_VM_OP_MIGRATE_IN_START) < 0) { qemuDomainStartAudit(vm, "migrated", false); - /* Note that we don't set an error here because qemudStartVMDaemon + /* Note that we don't set an error here because qemuProcessStart * should have already done that. */ if (!vm->persistent) { @@ -8931,7 +6512,7 @@ qemudDomainMigratePerform (virDomainPtr dom, } /* Clean up the source domain. */ - qemudShutdownVMDaemon(driver, vm, 1); + qemuProcessStop(driver, vm, 1); qemuDomainStopAudit(vm, "migrated"); resume = 0; @@ -8949,7 +6530,7 @@ qemudDomainMigratePerform (virDomainPtr dom, endjob: if (resume && vm->state == VIR_DOMAIN_PAUSED) { /* we got here through some sort of failure; start the domain again */ - if (doStartCPUs(driver, vm, dom->conn) < 0) { + if (qemuProcessStartCPUs(driver, vm, dom->conn) < 0) { /* Hm, we already know we are in error here. We don't want to * overwrite the previous error, though, so we just throw something * to the logs and hope for the best @@ -9113,7 +6694,7 @@ qemudDomainMigrateFinish2 (virConnectPtr dconn, * >= 0.10.6 to work properly. This isn't strictly necessary on * older qemu's, but it also doesn't hurt anything there */ - if (doStartCPUs(driver, vm, dconn) < 0) { + if (qemuProcessStartCPUs(driver, vm, dconn) < 0) { if (virGetLastError() == NULL) qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("resume operation failed")); @@ -9135,7 +6716,7 @@ qemudDomainMigrateFinish2 (virConnectPtr dconn, goto endjob; } } else { - qemudShutdownVMDaemon(driver, vm, 1); + qemuProcessStop(driver, vm, 1); qemuDomainStopAudit(vm, "failed"); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, @@ -9964,8 +7545,8 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, if (qemuDomainSnapshotSetCurrentActive(vm, driver->snapshotDir) < 0) goto endjob; - rc = qemudStartVMDaemon(snapshot->domain->conn, driver, vm, NULL, - false, -1, NULL, VIR_VM_OP_CREATE); + rc = qemuProcessStart(snapshot->domain->conn, driver, vm, NULL, + false, -1, NULL, VIR_VM_OP_CREATE); qemuDomainStartAudit(vm, "from-snapshot", rc >= 0); if (qemuDomainSnapshotSetCurrentInactive(vm, driver->snapshotDir) < 0) goto endjob; @@ -9977,7 +7558,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, /* qemu unconditionally starts the domain running again after * loadvm, so let's pause it to keep consistency */ - rc = doStopCPUs(driver, vm); + rc = qemuProcessStopCPUs(driver, vm); if (rc < 0) goto endjob; } @@ -9998,7 +7579,7 @@ static int qemuDomainRevertToSnapshot(virDomainSnapshotPtr snapshot, */ if (virDomainObjIsActive(vm)) { - qemudShutdownVMDaemon(driver, vm, 0); + qemuProcessStop(driver, vm, 0); qemuDomainStopAudit(vm, "from-snapshot"); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, @@ -10499,56 +8080,32 @@ static virStateDriver qemuStateDriver = { .active = qemudActive, }; -static int -qemudVMFilterRebuild(virConnectPtr conn ATTRIBUTE_UNUSED, - virHashIterator iter, void *data) -{ - virHashForEach(qemu_driver->domains.objs, iter, data); - - return 0; -} - -static int -qemudVMFiltersInstantiate(virConnectPtr conn, - virDomainDefPtr def) -{ - int err = 0; - int i; - - if (!conn) - return 1; - - for (i = 0 ; i < def->nnets ; i++) { - virDomainNetDefPtr net = def->nets[i]; - if ((net->filter) && (net->ifname)) { - if (virDomainConfNWFilterInstantiate(conn, net)) { - err = 1; - break; - } - } - } - - return err; -} - - static void -qemudVMDriverLock(void) { +qemuVMDriverLock(void) { qemuDriverLock(qemu_driver); }; static void -qemudVMDriverUnlock(void) { +qemuVMDriverUnlock(void) { qemuDriverUnlock(qemu_driver); }; +static int +qemuVMFilterRebuild(virConnectPtr conn ATTRIBUTE_UNUSED, + virHashIterator iter, void *data) +{ + virHashForEach(qemu_driver->domains.objs, iter, data); + + return 0; +} + static virNWFilterCallbackDriver qemuCallbackDriver = { .name = "QEMU", - .vmFilterRebuild = qemudVMFilterRebuild, - .vmDriverLock = qemudVMDriverLock, - .vmDriverUnlock = qemudVMDriverUnlock, + .vmFilterRebuild = qemuVMFilterRebuild, + .vmDriverLock = qemuVMDriverLock, + .vmDriverUnlock = qemuVMDriverUnlock, }; int qemuRegister(void) { diff --git a/src/qemu/qemu_driver.h b/src/qemu/qemu_driver.h index dac0935..73da9e4 100644 --- a/src/qemu/qemu_driver.h +++ b/src/qemu/qemu_driver.h @@ -21,34 +21,9 @@ * Author: Daniel P. Berrange <berrange@redhat.com> */ - -#ifndef QEMUD_DRIVER_H -# define QEMUD_DRIVER_H - -# include <config.h> - -# include <libxml/xpath.h> - -# include "internal.h" - -# if HAVE_LINUX_KVM_H -# include <linux/kvm.h> -# endif - -/* device for kvm ioctls */ -# define KVM_DEVICE "/dev/kvm" - -/* add definitions missing in older linux/kvm.h */ -# ifndef KVMIO -# define KVMIO 0xAE -# endif -# ifndef KVM_CHECK_EXTENSION -# define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03) -# endif -# ifndef KVM_CAP_NR_VCPUS -# define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */ -# endif +#ifndef __QEMU_DRIVER_H__ +# define __QEMU_DRIVER_H__ int qemuRegister(void); -#endif /* QEMUD_DRIVER_H */ +#endif /* __QEMU_DRIVER_H__ */ diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c new file mode 100644 index 0000000..7061fc2 --- /dev/null +++ b/src/qemu/qemu_process.c @@ -0,0 +1,2417 @@ +/* + * qemu_process.h: QEMU process management + * + * Copyright (C) 2006-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <fcntl.h> +#include <unistd.h> +#include <signal.h> +#include <sys/stat.h> + +#include "qemu_process.h" +#include "qemu_domain.h" +#include "qemu_cgroup.h" +#include "qemu_capabilities.h" +#include "qemu_monitor.h" +#include "qemu_command.h" +#include "qemu_audit.h" +#include "qemu_hostdev.h" +#include "qemu_hotplug.h" +#include "qemu_bridge_filter.h" + +#include "datatypes.h" +#include "logging.h" +#include "virterror_internal.h" +#include "memory.h" +#include "hooks.h" +#include "files.h" +#include "util.h" +#include "c-ctype.h" +#include "nodeinfo.h" +#include "processinfo.h" +#include "domain_nwfilter.h" + +#define VIR_FROM_THIS VIR_FROM_QEMU + +#define START_POSTFIX ": starting up\n" +#define SHUTDOWN_POSTFIX ": shutting down\n" + +/** + * qemudRemoveDomainStatus + * + * remove all state files of a domain from statedir + * + * Returns 0 on success + */ +static int +qemuProcessRemoveDomainStatus(struct qemud_driver *driver, + virDomainObjPtr vm) +{ + char ebuf[1024]; + char *file = NULL; + + if (virAsprintf(&file, "%s/%s.xml", driver->stateDir, vm->def->name) < 0) { + virReportOOMError(); + return(-1); + } + + if (unlink(file) < 0 && errno != ENOENT && errno != ENOTDIR) + VIR_WARN("Failed to remove domain XML for %s: %s", + vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf))); + VIR_FREE(file); + + if (virFileDeletePid(driver->stateDir, vm->def->name) != 0) + VIR_WARN("Failed to remove PID file for %s: %s", + vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf))); + + + return 0; +} + + +/* XXX figure out how to remove this */ +extern struct qemud_driver *qemu_driver; + +/* + * This is a callback registered with a qemuMonitorPtr instance, + * and to be invoked when the monitor console hits an end of file + * condition, or error, thus indicating VM shutdown should be + * performed + */ +static void +qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm, + int hasError) +{ + struct qemud_driver *driver = qemu_driver; + virDomainEventPtr event = NULL; + qemuDomainObjPrivatePtr priv; + + VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name); + + virDomainObjLock(vm); + + if (!virDomainObjIsActive(vm)) { + VIR_DEBUG("Domain %p is not active, ignoring EOF", vm); + virDomainObjUnlock(vm); + return; + } + + priv = vm->privateData; + if (!hasError && priv->monJSON && !priv->gotShutdown) { + VIR_DEBUG("Monitor connection to '%s' closed without SHUTDOWN event; " + "assuming the domain crashed", vm->def->name); + hasError = 1; + } + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STOPPED, + hasError ? + VIR_DOMAIN_EVENT_STOPPED_FAILED : + VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN); + + qemuProcessStop(driver, vm, 0); + qemuDomainStopAudit(vm, hasError ? "failed" : "shutdown"); + + if (!vm->persistent) + virDomainRemoveInactive(&driver->domains, vm); + else + virDomainObjUnlock(vm); + + if (event) { + qemuDriverLock(driver); + qemuDomainEventQueue(driver, event); + qemuDriverUnlock(driver); + } +} + + +static virDomainDiskDefPtr +qemuProcessFindDomainDiskByPath(virDomainObjPtr vm, + const char *path) +{ + int i; + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk; + + disk = vm->def->disks[i]; + if (disk->src != NULL && STREQ(disk->src, path)) + return disk; + } + + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("no disk found with path %s"), + path); + return NULL; +} + +static virDomainDiskDefPtr +qemuProcessFindDomainDiskByAlias(virDomainObjPtr vm, + const char *alias) +{ + int i; + + if (STRPREFIX(alias, QEMU_DRIVE_HOST_PREFIX)) + alias += strlen(QEMU_DRIVE_HOST_PREFIX); + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk; + + disk = vm->def->disks[i]; + if (disk->info.alias != NULL && STREQ(disk->info.alias, alias)) + return disk; + } + + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("no disk found with alias %s"), + alias); + return NULL; +} + +static int +qemuProcessGetVolumeQcowPassphrase(virConnectPtr conn, + virDomainDiskDefPtr disk, + char **secretRet, + size_t *secretLen) +{ + virSecretPtr secret; + char *passphrase; + unsigned char *data; + size_t size; + int ret = -1; + virStorageEncryptionPtr enc; + + if (!disk->encryption) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("disk %s does not have any encryption information"), + disk->src); + return -1; + } + enc = disk->encryption; + + if (!conn) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("cannot find secrets without a connection")); + goto cleanup; + } + + if (conn->secretDriver == NULL || + conn->secretDriver->lookupByUUID == NULL || + conn->secretDriver->getValue == NULL) { + qemuReportError(VIR_ERR_NO_SUPPORT, "%s", + _("secret storage not supported")); + goto cleanup; + } + + if (enc->format != VIR_STORAGE_ENCRYPTION_FORMAT_QCOW || + enc->nsecrets != 1 || + enc->secrets[0]->type != + VIR_STORAGE_ENCRYPTION_SECRET_TYPE_PASSPHRASE) { + qemuReportError(VIR_ERR_XML_ERROR, + _("invalid <encryption> for volume %s"), disk->src); + goto cleanup; + } + + secret = conn->secretDriver->lookupByUUID(conn, + enc->secrets[0]->uuid); + if (secret == NULL) + goto cleanup; + data = conn->secretDriver->getValue(secret, &size, + VIR_SECRET_GET_VALUE_INTERNAL_CALL); + virUnrefSecret(secret); + if (data == NULL) + goto cleanup; + + if (memchr(data, '\0', size) != NULL) { + memset(data, 0, size); + VIR_FREE(data); + qemuReportError(VIR_ERR_XML_ERROR, + _("format='qcow' passphrase for %s must not contain a " + "'\\0'"), disk->src); + goto cleanup; + } + + if (VIR_ALLOC_N(passphrase, size + 1) < 0) { + memset(data, 0, size); + VIR_FREE(data); + virReportOOMError(); + goto cleanup; + } + memcpy(passphrase, data, size); + passphrase[size] = '\0'; + + memset(data, 0, size); + VIR_FREE(data); + + *secretRet = passphrase; + *secretLen = size; + + ret = 0; + +cleanup: + return ret; +} + +static int +qemuProcessFindVolumeQcowPassphrase(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virConnectPtr conn, + virDomainObjPtr vm, + const char *path, + char **secretRet, + size_t *secretLen) +{ + virDomainDiskDefPtr disk; + int ret = -1; + + virDomainObjLock(vm); + disk = qemuProcessFindDomainDiskByPath(vm, path); + + if (!disk) + goto cleanup; + + ret = qemuProcessGetVolumeQcowPassphrase(conn, disk, secretRet, secretLen); + +cleanup: + virDomainObjUnlock(vm); + return ret; +} + + +static int +qemuProcessHandleReset(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm) +{ + struct qemud_driver *driver = qemu_driver; + virDomainEventPtr event; + + virDomainObjLock(vm); + event = virDomainEventRebootNewFromObj(vm); + virDomainObjUnlock(vm); + + if (event) { + qemuDriverLock(driver); + qemuDomainEventQueue(driver, event); + qemuDriverUnlock(driver); + } + + return 0; +} + + +static int +qemuProcessHandleShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm) +{ + virDomainObjLock(vm); + ((qemuDomainObjPrivatePtr) vm->privateData)->gotShutdown = true; + virDomainObjUnlock(vm); + + return 0; +} + + +static int +qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm) +{ + struct qemud_driver *driver = qemu_driver; + virDomainEventPtr event = NULL; + + virDomainObjLock(vm); + if (vm->state == VIR_DOMAIN_RUNNING) { + VIR_DEBUG("Transitioned guest %s to paused state due to unknown event", vm->def->name); + + vm->state = VIR_DOMAIN_PAUSED; + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_SUSPENDED, + VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); + + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) + VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); + } + virDomainObjUnlock(vm); + + if (event) { + qemuDriverLock(driver); + if (event) + qemuDomainEventQueue(driver, event); + qemuDriverUnlock(driver); + } + + return 0; +} + + +static int +qemuProcessHandleRTCChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm, + long long offset) +{ + struct qemud_driver *driver = qemu_driver; + virDomainEventPtr event; + + virDomainObjLock(vm); + event = virDomainEventRTCChangeNewFromObj(vm, offset); + + if (vm->def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_VARIABLE) + vm->def->clock.data.adjustment = offset; + + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) + VIR_WARN0("unable to save domain status with RTC change"); + + virDomainObjUnlock(vm); + + if (event) { + qemuDriverLock(driver); + qemuDomainEventQueue(driver, event); + qemuDriverUnlock(driver); + } + + return 0; +} + + +static int +qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm, + int action) +{ + struct qemud_driver *driver = qemu_driver; + virDomainEventPtr watchdogEvent = NULL; + virDomainEventPtr lifecycleEvent = NULL; + + virDomainObjLock(vm); + watchdogEvent = virDomainEventWatchdogNewFromObj(vm, action); + + if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE && + vm->state == VIR_DOMAIN_RUNNING) { + VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name); + + vm->state = VIR_DOMAIN_PAUSED; + lifecycleEvent = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_SUSPENDED, + VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG); + + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) + VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); + } + + if (vm->def->watchdog->action == VIR_DOMAIN_WATCHDOG_ACTION_DUMP) { + struct qemuDomainWatchdogEvent *wdEvent; + if (VIR_ALLOC(wdEvent) == 0) { + wdEvent->action = VIR_DOMAIN_WATCHDOG_ACTION_DUMP; + wdEvent->vm = vm; + ignore_value(virThreadPoolSendJob(driver->workerPool, wdEvent)); + } else + virReportOOMError(); + } + + virDomainObjUnlock(vm); + + if (watchdogEvent || lifecycleEvent) { + qemuDriverLock(driver); + if (watchdogEvent) + qemuDomainEventQueue(driver, watchdogEvent); + if (lifecycleEvent) + qemuDomainEventQueue(driver, lifecycleEvent); + qemuDriverUnlock(driver); + } + + return 0; +} + + +static int +qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm, + const char *diskAlias, + int action, + const char *reason) +{ + struct qemud_driver *driver = qemu_driver; + virDomainEventPtr ioErrorEvent = NULL; + virDomainEventPtr ioErrorEvent2 = NULL; + virDomainEventPtr lifecycleEvent = NULL; + const char *srcPath; + const char *devAlias; + virDomainDiskDefPtr disk; + + virDomainObjLock(vm); + disk = qemuProcessFindDomainDiskByAlias(vm, diskAlias); + + if (disk) { + srcPath = disk->src; + devAlias = disk->info.alias; + } else { + srcPath = ""; + devAlias = ""; + } + + ioErrorEvent = virDomainEventIOErrorNewFromObj(vm, srcPath, devAlias, action); + ioErrorEvent2 = virDomainEventIOErrorReasonNewFromObj(vm, srcPath, devAlias, action, reason); + + if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE && + vm->state == VIR_DOMAIN_RUNNING) { + VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name); + + vm->state = VIR_DOMAIN_PAUSED; + lifecycleEvent = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_SUSPENDED, + VIR_DOMAIN_EVENT_SUSPENDED_IOERROR); + + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) + VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name); + } + virDomainObjUnlock(vm); + + if (ioErrorEvent || ioErrorEvent2 || lifecycleEvent) { + qemuDriverLock(driver); + if (ioErrorEvent) + qemuDomainEventQueue(driver, ioErrorEvent); + if (ioErrorEvent2) + qemuDomainEventQueue(driver, ioErrorEvent2); + if (lifecycleEvent) + qemuDomainEventQueue(driver, lifecycleEvent); + qemuDriverUnlock(driver); + } + + return 0; +} + + +static int +qemuProcessHandleGraphics(qemuMonitorPtr mon ATTRIBUTE_UNUSED, + virDomainObjPtr vm, + int phase, + int localFamily, + const char *localNode, + const char *localService, + int remoteFamily, + const char *remoteNode, + const char *remoteService, + const char *authScheme, + const char *x509dname, + const char *saslUsername) +{ + struct qemud_driver *driver = qemu_driver; + virDomainEventPtr event; + virDomainEventGraphicsAddressPtr localAddr = NULL; + virDomainEventGraphicsAddressPtr remoteAddr = NULL; + virDomainEventGraphicsSubjectPtr subject = NULL; + int i; + + virDomainObjLock(vm); + + if (VIR_ALLOC(localAddr) < 0) + goto no_memory; + localAddr->family = localFamily; + if (!(localAddr->service = strdup(localService)) || + !(localAddr->node = strdup(localNode))) + goto no_memory; + + if (VIR_ALLOC(remoteAddr) < 0) + goto no_memory; + remoteAddr->family = remoteFamily; + if (!(remoteAddr->service = strdup(remoteService)) || + !(remoteAddr->node = strdup(remoteNode))) + goto no_memory; + + if (VIR_ALLOC(subject) < 0) + goto no_memory; + if (x509dname) { + if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0) + goto no_memory; + if (!(subject->identities[subject->nidentity].type = strdup("x509dname")) || + !(subject->identities[subject->nidentity].name = strdup(x509dname))) + goto no_memory; + subject->nidentity++; + } + if (saslUsername) { + if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0) + goto no_memory; + if (!(subject->identities[subject->nidentity].type = strdup("saslUsername")) || + !(subject->identities[subject->nidentity].name = strdup(saslUsername))) + goto no_memory; + subject->nidentity++; + } + + event = virDomainEventGraphicsNewFromObj(vm, phase, localAddr, remoteAddr, authScheme, subject); + virDomainObjUnlock(vm); + + if (event) { + qemuDriverLock(driver); + qemuDomainEventQueue(driver, event); + qemuDriverUnlock(driver); + } + + return 0; + +no_memory: + virReportOOMError(); + if (localAddr) { + VIR_FREE(localAddr->service); + VIR_FREE(localAddr->node); + VIR_FREE(localAddr); + } + if (remoteAddr) { + VIR_FREE(remoteAddr->service); + VIR_FREE(remoteAddr->node); + VIR_FREE(remoteAddr); + } + if (subject) { + for (i = 0 ; i < subject->nidentity ; i++) { + VIR_FREE(subject->identities[i].type); + VIR_FREE(subject->identities[i].name); + } + VIR_FREE(subject->identities); + VIR_FREE(subject); + } + + return -1; +} + + +static void qemuProcessHandleMonitorDestroy(qemuMonitorPtr mon, + virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + if (priv->mon == mon) + priv->mon = NULL; + virDomainObjUnref(vm); +} + +static qemuMonitorCallbacks monitorCallbacks = { + .destroy = qemuProcessHandleMonitorDestroy, + .eofNotify = qemuProcessHandleMonitorEOF, + .diskSecretLookup = qemuProcessFindVolumeQcowPassphrase, + .domainShutdown = qemuProcessHandleShutdown, + .domainStop = qemuProcessHandleStop, + .domainReset = qemuProcessHandleReset, + .domainRTCChange = qemuProcessHandleRTCChange, + .domainWatchdog = qemuProcessHandleWatchdog, + .domainIOError = qemuProcessHandleIOError, + .domainGraphics = qemuProcessHandleGraphics, +}; + +static int +qemuConnectMonitor(struct qemud_driver *driver, virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int ret = -1; + + if (virSecurityManagerSetSocketLabel(driver->securityManager, vm) < 0) { + VIR_ERROR(_("Failed to set security context for monitor for %s"), + vm->def->name); + goto error; + } + + /* Hold an extra reference because we can't allow 'vm' to be + * deleted while the monitor is active */ + virDomainObjRef(vm); + + priv->mon = qemuMonitorOpen(vm, + priv->monConfig, + priv->monJSON, + &monitorCallbacks); + + if (priv->mon == NULL) + virDomainObjUnref(vm); + + if (virSecurityManagerClearSocketLabel(driver->securityManager, vm) < 0) { + VIR_ERROR(_("Failed to clear security context for monitor for %s"), + vm->def->name); + goto error; + } + + if (priv->mon == NULL) { + VIR_INFO("Failed to connect monitor for %s", vm->def->name); + goto error; + } + + + qemuDomainObjEnterMonitorWithDriver(driver, vm); + ret = qemuMonitorSetCapabilities(priv->mon); + qemuDomainObjExitMonitorWithDriver(driver, vm); + +error: + + return ret; +} + +static int +qemuProcessLogFD(struct qemud_driver *driver, const char* name, bool append) +{ + char *logfile; + mode_t logmode; + int fd = -1; + + if (virAsprintf(&logfile, "%s/%s.log", driver->logDir, name) < 0) { + virReportOOMError(); + return -1; + } + + logmode = O_CREAT | O_WRONLY; + /* Only logrotate files in /var/log, so only append if running privileged */ + if (driver->privileged || append) + logmode |= O_APPEND; + else + logmode |= O_TRUNC; + + if ((fd = open(logfile, logmode, S_IRUSR | S_IWUSR)) < 0) { + virReportSystemError(errno, + _("failed to create logfile %s"), + logfile); + VIR_FREE(logfile); + return -1; + } + VIR_FREE(logfile); + if (virSetCloseExec(fd) < 0) { + virReportSystemError(errno, "%s", + _("Unable to set VM logfile close-on-exec flag")); + VIR_FORCE_CLOSE(fd); + return -1; + } + return fd; +} + + +static int +qemuProcessLogReadFD(const char* logDir, const char* name, off_t pos) +{ + char *logfile; + mode_t logmode = O_RDONLY; + int fd = -1; + + if (virAsprintf(&logfile, "%s/%s.log", logDir, name) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("failed to build logfile name %s/%s.log"), + logDir, name); + return -1; + } + + if ((fd = open(logfile, logmode)) < 0) { + virReportSystemError(errno, + _("failed to create logfile %s"), + logfile); + VIR_FREE(logfile); + return -1; + } + if (virSetCloseExec(fd) < 0) { + virReportSystemError(errno, "%s", + _("Unable to set VM logfile close-on-exec flag")); + VIR_FORCE_CLOSE(fd); + VIR_FREE(logfile); + return -1; + } + if (pos < 0 || lseek(fd, pos, SEEK_SET) < 0) { + virReportSystemError(pos < 0 ? 0 : errno, + _("Unable to seek to %lld in %s"), + (long long) pos, logfile); + VIR_FORCE_CLOSE(fd); + } + VIR_FREE(logfile); + return fd; +} + + +typedef int qemuProcessLogHandleOutput(virDomainObjPtr vm, + const char *output, + int fd); + +/* + * Returns -1 for error, 0 on success + */ +static int +qemuProcessReadLogOutput(virDomainObjPtr vm, + int fd, + char *buf, + size_t buflen, + qemuProcessLogHandleOutput func, + const char *what, + int timeout) +{ + int retries = (timeout*10); + int got = 0; + buf[0] = '\0'; + + while (retries) { + ssize_t func_ret, ret; + int isdead = 0; + + func_ret = func(vm, buf, fd); + + if (kill(vm->pid, 0) == -1 && errno == ESRCH) + isdead = 1; + + /* Any failures should be detected before we read the log, so we + * always have something useful to report on failure. */ + ret = saferead(fd, buf+got, buflen-got-1); + if (ret < 0) { + virReportSystemError(errno, + _("Failure while reading %s log output"), + what); + return -1; + } + + got += ret; + buf[got] = '\0'; + if (got == buflen-1) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Out of space while reading %s log output: %s"), + what, buf); + return -1; + } + + if (isdead) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Process exited while reading %s log output: %s"), + what, buf); + return -1; + } + + if (func_ret <= 0) + return func_ret; + + usleep(100*1000); + retries--; + } + + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Timed out while reading %s log output: %s"), + what, buf); + return -1; +} + + +/* + * Look at a chunk of data from the QEMU stdout logs and try to + * find a TTY device, as indicated by a line like + * + * char device redirected to /dev/pts/3 + * + * Returns -1 for error, 0 success, 1 continue reading + */ +static int +qemuProcessExtractTTYPath(const char *haystack, + size_t *offset, + char **path) +{ + static const char needle[] = "char device redirected to"; + char *tmp, *dev; + + VIR_FREE(*path); + /* First look for our magic string */ + if (!(tmp = strstr(haystack + *offset, needle))) { + return 1; + } + tmp += sizeof(needle); + dev = tmp; + + /* + * And look for first whitespace character and nul terminate + * to mark end of the pty path + */ + while (*tmp) { + if (c_isspace(*tmp)) { + *path = strndup(dev, tmp-dev); + if (*path == NULL) { + virReportOOMError(); + return -1; + } + + /* ... now further update offset till we get EOL */ + *offset = tmp - haystack; + return 0; + } + tmp++; + } + + /* + * We found a path, but didn't find any whitespace, + * so it must be still incomplete - we should at + * least see a \n - indicate that we want to carry + * on trying again + */ + return 1; +} + +static int +qemuProcessFindCharDevicePTYsMonitor(virDomainObjPtr vm, + virHashTablePtr paths) +{ + int i; + +#define LOOKUP_PTYS(array, arraylen, idprefix) \ + for (i = 0 ; i < (arraylen) ; i++) { \ + virDomainChrDefPtr chr = (array)[i]; \ + if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) { \ + char id[16]; \ + \ + if (snprintf(id, sizeof(id), idprefix "%i", i) >= sizeof(id)) \ + return -1; \ + \ + const char *path = (const char *) virHashLookup(paths, id); \ + if (path == NULL) { \ + if (chr->source.data.file.path == NULL) { \ + /* neither the log output nor 'info chardev' had a */ \ + /* pty path for this chardev, report an error */ \ + qemuReportError(VIR_ERR_INTERNAL_ERROR, \ + _("no assigned pty for device %s"), id); \ + return -1; \ + } else { \ + /* 'info chardev' had no pty path for this chardev, */\ + /* but the log output had, so we're fine */ \ + continue; \ + } \ + } \ + \ + VIR_FREE(chr->source.data.file.path); \ + chr->source.data.file.path = strdup(path); \ + \ + if (chr->source.data.file.path == NULL) { \ + virReportOOMError(); \ + return -1; \ + } \ + } \ + } + + LOOKUP_PTYS(vm->def->serials, vm->def->nserials, "serial"); + LOOKUP_PTYS(vm->def->parallels, vm->def->nparallels, "parallel"); + LOOKUP_PTYS(vm->def->channels, vm->def->nchannels, "channel"); + if (vm->def->console) + LOOKUP_PTYS(&vm->def->console, 1, "console"); +#undef LOOKUP_PTYS + + return 0; +} + +static int +qemuProcessFindCharDevicePTYs(virDomainObjPtr vm, + const char *output, + int fd ATTRIBUTE_UNUSED) +{ + size_t offset = 0; + int ret, i; + + /* The order in which QEMU prints out the PTY paths is + the order in which it procsses its serial and parallel + device args. This code must match that ordering.... */ + + /* first comes the serial devices */ + for (i = 0 ; i < vm->def->nserials ; i++) { + virDomainChrDefPtr chr = vm->def->serials[i]; + if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) { + if ((ret = qemuProcessExtractTTYPath(output, &offset, + &chr->source.data.file.path)) != 0) + return ret; + } + } + + /* then the parallel devices */ + for (i = 0 ; i < vm->def->nparallels ; i++) { + virDomainChrDefPtr chr = vm->def->parallels[i]; + if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) { + if ((ret = qemuProcessExtractTTYPath(output, &offset, + &chr->source.data.file.path)) != 0) + return ret; + } + } + + /* then the channel devices */ + for (i = 0 ; i < vm->def->nchannels ; i++) { + virDomainChrDefPtr chr = vm->def->channels[i]; + if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) { + if ((ret = qemuProcessExtractTTYPath(output, &offset, + &chr->source.data.file.path)) != 0) + return ret; + } + } + + return 0; +} + +static void qemuProcessFreePtyPath(void *payload, const char *name ATTRIBUTE_UNUSED) +{ + VIR_FREE(payload); +} + +static void +qemuProcessReadLogFD(int logfd, char *buf, int maxlen, int off) +{ + int ret; + char *tmpbuf = buf + off; + + ret = saferead(logfd, tmpbuf, maxlen - off - 1); + if (ret < 0) { + ret = 0; + } + + tmpbuf[ret] = '\0'; +} + +static int +qemuProcessWaitForMonitor(struct qemud_driver* driver, + virDomainObjPtr vm, off_t pos) +{ + char buf[4096] = ""; /* Plenty of space to get startup greeting */ + int logfd; + int ret = -1; + virHashTablePtr paths = NULL; + + if ((logfd = qemuProcessLogReadFD(driver->logDir, vm->def->name, pos)) < 0) + return -1; + + if (qemuProcessReadLogOutput(vm, logfd, buf, sizeof(buf), + qemuProcessFindCharDevicePTYs, + "console", 30) < 0) + goto closelog; + + VIR_DEBUG("Connect monitor to %p '%s'", vm, vm->def->name); + if (qemuConnectMonitor(driver, vm) < 0) { + goto cleanup; + } + + /* Try to get the pty path mappings again via the monitor. This is much more + * reliable if it's available. + * Note that the monitor itself can be on a pty, so we still need to try the + * log output method. */ + paths = virHashCreate(0); + if (paths == NULL) { + virReportOOMError(); + goto cleanup; + } + + qemuDomainObjEnterMonitorWithDriver(driver, vm); + qemuDomainObjPrivatePtr priv = vm->privateData; + ret = qemuMonitorGetPtyPaths(priv->mon, paths); + qemuDomainObjExitMonitorWithDriver(driver, vm); + + VIR_DEBUG("qemuMonitorGetPtyPaths returned %i", ret); + if (ret == 0) + ret = qemuProcessFindCharDevicePTYsMonitor(vm, paths); + +cleanup: + if (paths) { + virHashFree(paths, qemuProcessFreePtyPath); + } + + if (kill(vm->pid, 0) == -1 && errno == ESRCH) { + /* VM is dead, any other error raised in the interim is probably + * not as important as the qemu cmdline output */ + qemuProcessReadLogFD(logfd, buf, sizeof(buf), strlen(buf)); + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("process exited while connecting to monitor: %s"), + buf); + ret = -1; + } + +closelog: + if (VIR_CLOSE(logfd) < 0) { + char ebuf[4096]; + VIR_WARN("Unable to close logfile: %s", + virStrerror(errno, ebuf, sizeof ebuf)); + } + + return ret; +} + +static int +qemuProcessDetectVcpuPIDs(struct qemud_driver *driver, + virDomainObjPtr vm) +{ + pid_t *cpupids = NULL; + int ncpupids; + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (vm->def->virtType != VIR_DOMAIN_VIRT_KVM) { + priv->nvcpupids = 1; + if (VIR_ALLOC_N(priv->vcpupids, priv->nvcpupids) < 0) { + virReportOOMError(); + return -1; + } + priv->vcpupids[0] = vm->pid; + return 0; + } + + /* What follows is now all KVM specific */ + + qemuDomainObjEnterMonitorWithDriver(driver, vm); + if ((ncpupids = qemuMonitorGetCPUInfo(priv->mon, &cpupids)) < 0) { + qemuDomainObjExitMonitorWithDriver(driver, vm); + return -1; + } + qemuDomainObjExitMonitorWithDriver(driver, vm); + + /* Treat failure to get VCPU<->PID mapping as non-fatal */ + if (ncpupids == 0) + return 0; + + if (ncpupids != vm->def->vcpus) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("got wrong number of vCPU pids from QEMU monitor. " + "got %d, wanted %d"), + ncpupids, vm->def->vcpus); + VIR_FREE(cpupids); + return -1; + } + + priv->nvcpupids = ncpupids; + priv->vcpupids = cpupids; + return 0; +} + +/* + * To be run between fork/exec of QEMU only + */ +static int +qemuProcessInitCpuAffinity(virDomainObjPtr vm) +{ + int i, hostcpus, maxcpu = QEMUD_CPUMASK_LEN; + virNodeInfo nodeinfo; + unsigned char *cpumap; + int cpumaplen; + + DEBUG0("Setting CPU affinity"); + + if (nodeGetInfo(NULL, &nodeinfo) < 0) + return -1; + + /* setaffinity fails if you set bits for CPUs which + * aren't present, so we have to limit ourselves */ + hostcpus = VIR_NODEINFO_MAXCPUS(nodeinfo); + if (maxcpu > hostcpus) + maxcpu = hostcpus; + + cpumaplen = VIR_CPU_MAPLEN(maxcpu); + if (VIR_ALLOC_N(cpumap, cpumaplen) < 0) { + virReportOOMError(); + return -1; + } + + if (vm->def->cpumask) { + /* XXX why don't we keep 'cpumask' in the libvirt cpumap + * format to start with ?!?! */ + for (i = 0 ; i < maxcpu && i < vm->def->cpumasklen ; i++) + if (vm->def->cpumask[i]) + VIR_USE_CPU(cpumap, i); + } else { + /* You may think this is redundant, but we can't assume libvirtd + * itself is running on all pCPUs, so we need to explicitly set + * the spawned QEMU instance to all pCPUs if no map is given in + * its config file */ + for (i = 0 ; i < maxcpu ; i++) + VIR_USE_CPU(cpumap, i); + } + + /* We are pressuming we are running between fork/exec of QEMU + * so use '0' to indicate our own process ID. No threads are + * running at this point + */ + if (virProcessInfoSetAffinity(0, /* Self */ + cpumap, cpumaplen, maxcpu) < 0) { + VIR_FREE(cpumap); + return -1; + } + VIR_FREE(cpumap); + + return 0; +} + + +static int +qemuProcessInitPasswords(virConnectPtr conn, + struct qemud_driver *driver, + virDomainObjPtr vm, + unsigned long long qemuCmdFlags) +{ + int ret = 0; + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (vm->def->ngraphics == 1) { + if (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) { + ret = qemuDomainChangeGraphicsPasswords(driver, vm, + VIR_DOMAIN_GRAPHICS_TYPE_VNC, + &vm->def->graphics[0]->data.vnc.auth, + driver->vncPassword); + } else if (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) { + ret = qemuDomainChangeGraphicsPasswords(driver, vm, + VIR_DOMAIN_GRAPHICS_TYPE_SPICE, + &vm->def->graphics[0]->data.spice.auth, + driver->spicePassword); + } + } + + if (ret < 0) + goto cleanup; + + if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { + int i; + + for (i = 0 ; i < vm->def->ndisks ; i++) { + char *secret; + size_t secretLen; + + if (!vm->def->disks[i]->encryption || + !vm->def->disks[i]->src) + continue; + + if (qemuProcessGetVolumeQcowPassphrase(conn, + vm->def->disks[i], + &secret, &secretLen) < 0) + goto cleanup; + + qemuDomainObjEnterMonitorWithDriver(driver, vm); + ret = qemuMonitorSetDrivePassphrase(priv->mon, + vm->def->disks[i]->info.alias, + secret); + VIR_FREE(secret); + qemuDomainObjExitMonitorWithDriver(driver, vm); + if (ret < 0) + goto cleanup; + } + } + +cleanup: + return ret; +} + + +#define QEMU_PCI_VENDOR_INTEL 0x8086 +#define QEMU_PCI_VENDOR_LSI_LOGIC 0x1000 +#define QEMU_PCI_VENDOR_REDHAT 0x1af4 +#define QEMU_PCI_VENDOR_CIRRUS 0x1013 +#define QEMU_PCI_VENDOR_REALTEK 0x10ec +#define QEMU_PCI_VENDOR_AMD 0x1022 +#define QEMU_PCI_VENDOR_ENSONIQ 0x1274 +#define QEMU_PCI_VENDOR_VMWARE 0x15ad +#define QEMU_PCI_VENDOR_QEMU 0x1234 + +#define QEMU_PCI_PRODUCT_DISK_VIRTIO 0x1001 + +#define QEMU_PCI_PRODUCT_BALLOON_VIRTIO 0x1002 + +#define QEMU_PCI_PRODUCT_NIC_NE2K 0x8029 +#define QEMU_PCI_PRODUCT_NIC_PCNET 0x2000 +#define QEMU_PCI_PRODUCT_NIC_RTL8139 0x8139 +#define QEMU_PCI_PRODUCT_NIC_E1000 0x100E +#define QEMU_PCI_PRODUCT_NIC_VIRTIO 0x1000 + +#define QEMU_PCI_PRODUCT_VGA_CIRRUS 0x00b8 +#define QEMU_PCI_PRODUCT_VGA_VMWARE 0x0405 +#define QEMU_PCI_PRODUCT_VGA_STDVGA 0x1111 + +#define QEMU_PCI_PRODUCT_AUDIO_AC97 0x2415 +#define QEMU_PCI_PRODUCT_AUDIO_ES1370 0x5000 + +#define QEMU_PCI_PRODUCT_CONTROLLER_PIIX 0x7010 +#define QEMU_PCI_PRODUCT_CONTROLLER_LSI 0x0012 + +#define QEMU_PCI_PRODUCT_WATCHDOG_I63000ESB 0x25ab + +static int +qemuProcessAssignNextPCIAddress(virDomainDeviceInfo *info, + int vendor, + int product, + qemuMonitorPCIAddress *addrs, + int naddrs) +{ + int found = 0; + int i; + + VIR_DEBUG("Look for %x:%x out of %d", vendor, product, naddrs); + + for (i = 0 ; (i < naddrs) && !found; i++) { + VIR_DEBUG("Maybe %x:%x", addrs[i].vendor, addrs[i].product); + if (addrs[i].vendor == vendor && + addrs[i].product == product) { + VIR_DEBUG("Match %d", i); + found = 1; + break; + } + } + if (!found) { + return -1; + } + + /* Blank it out so this device isn't matched again */ + addrs[i].vendor = 0; + addrs[i].product = 0; + + if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) + info->type = VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI; + + if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) { + info->addr.pci.domain = addrs[i].addr.domain; + info->addr.pci.bus = addrs[i].addr.bus; + info->addr.pci.slot = addrs[i].addr.slot; + info->addr.pci.function = addrs[i].addr.function; + } + + return 0; +} + +static int +qemuProcessGetPCIDiskVendorProduct(virDomainDiskDefPtr def, + unsigned *vendor, + unsigned *product) +{ + switch (def->bus) { + case VIR_DOMAIN_DISK_BUS_VIRTIO: + *vendor = QEMU_PCI_VENDOR_REDHAT; + *product = QEMU_PCI_PRODUCT_DISK_VIRTIO; + break; + + default: + return -1; + } + + return 0; +} + +static int +qemuProcessGetPCINetVendorProduct(virDomainNetDefPtr def, + unsigned *vendor, + unsigned *product) +{ + if (!def->model) + return -1; + + if (STREQ(def->model, "ne2k_pci")) { + *vendor = QEMU_PCI_VENDOR_REALTEK; + *product = QEMU_PCI_PRODUCT_NIC_NE2K; + } else if (STREQ(def->model, "pcnet")) { + *vendor = QEMU_PCI_VENDOR_AMD; + *product = QEMU_PCI_PRODUCT_NIC_PCNET; + } else if (STREQ(def->model, "rtl8139")) { + *vendor = QEMU_PCI_VENDOR_REALTEK; + *product = QEMU_PCI_PRODUCT_NIC_RTL8139; + } else if (STREQ(def->model, "e1000")) { + *vendor = QEMU_PCI_VENDOR_INTEL; + *product = QEMU_PCI_PRODUCT_NIC_E1000; + } else if (STREQ(def->model, "virtio")) { + *vendor = QEMU_PCI_VENDOR_REDHAT; + *product = QEMU_PCI_PRODUCT_NIC_VIRTIO; + } else { + VIR_INFO("Unexpected NIC model %s, cannot get PCI address", + def->model); + return -1; + } + return 0; +} + +static int +qemuProcessGetPCIControllerVendorProduct(virDomainControllerDefPtr def, + unsigned *vendor, + unsigned *product) +{ + switch (def->type) { + case VIR_DOMAIN_CONTROLLER_TYPE_SCSI: + *vendor = QEMU_PCI_VENDOR_LSI_LOGIC; + *product = QEMU_PCI_PRODUCT_CONTROLLER_LSI; + break; + + case VIR_DOMAIN_CONTROLLER_TYPE_FDC: + /* XXX we could put in the ISA bridge address, but + that's not technically the FDC's address */ + return -1; + + case VIR_DOMAIN_CONTROLLER_TYPE_IDE: + *vendor = QEMU_PCI_VENDOR_INTEL; + *product = QEMU_PCI_PRODUCT_CONTROLLER_PIIX; + break; + + default: + VIR_INFO("Unexpected controller type %s, cannot get PCI address", + virDomainControllerTypeToString(def->type)); + return -1; + } + + return 0; +} + +static int +qemuProcessGetPCIVideoVendorProduct(virDomainVideoDefPtr def, + unsigned *vendor, + unsigned *product) +{ + switch (def->type) { + case VIR_DOMAIN_VIDEO_TYPE_CIRRUS: + *vendor = QEMU_PCI_VENDOR_CIRRUS; + *product = QEMU_PCI_PRODUCT_VGA_CIRRUS; + break; + + case VIR_DOMAIN_VIDEO_TYPE_VGA: + *vendor = QEMU_PCI_VENDOR_QEMU; + *product = QEMU_PCI_PRODUCT_VGA_STDVGA; + break; + + case VIR_DOMAIN_VIDEO_TYPE_VMVGA: + *vendor = QEMU_PCI_VENDOR_VMWARE; + *product = QEMU_PCI_PRODUCT_VGA_VMWARE; + break; + + default: + return -1; + } + return 0; +} + +static int +qemuProcessGetPCISoundVendorProduct(virDomainSoundDefPtr def, + unsigned *vendor, + unsigned *product) +{ + switch (def->model) { + case VIR_DOMAIN_SOUND_MODEL_ES1370: + *vendor = QEMU_PCI_VENDOR_ENSONIQ; + *product = QEMU_PCI_PRODUCT_AUDIO_ES1370; + break; + + case VIR_DOMAIN_SOUND_MODEL_AC97: + *vendor = QEMU_PCI_VENDOR_INTEL; + *product = QEMU_PCI_PRODUCT_AUDIO_AC97; + break; + + default: + return -1; + } + + return 0; +} + +static int +qemuProcessGetPCIWatchdogVendorProduct(virDomainWatchdogDefPtr def, + unsigned *vendor, + unsigned *product) +{ + switch (def->model) { + case VIR_DOMAIN_WATCHDOG_MODEL_I6300ESB: + *vendor = QEMU_PCI_VENDOR_INTEL; + *product = QEMU_PCI_PRODUCT_WATCHDOG_I63000ESB; + break; + + default: + return -1; + } + + return 0; +} + + +static int +qemuProcessGetPCIMemballoonVendorProduct(virDomainMemballoonDefPtr def, + unsigned *vendor, + unsigned *product) +{ + switch (def->model) { + case VIR_DOMAIN_MEMBALLOON_MODEL_VIRTIO: + *vendor = QEMU_PCI_VENDOR_REDHAT; + *product = QEMU_PCI_PRODUCT_BALLOON_VIRTIO; + break; + + default: + return -1; + } + + return 0; +} + + +/* + * This entire method assumes that PCI devices in 'info pci' + * match ordering of devices specified on the command line + * wrt to devices of matching vendor+product + * + * XXXX this might not be a valid assumption if we assign + * some static addrs on CLI. Have to check that... + */ +static int +qemuProcessDetectPCIAddresses(virDomainObjPtr vm, + qemuMonitorPCIAddress *addrs, + int naddrs) +{ + unsigned int vendor = 0, product = 0; + int i; + + /* XXX should all these vendor/product IDs be kept in the + * actual device data structure instead ? + */ + + for (i = 0 ; i < vm->def->ndisks ; i++) { + if (qemuProcessGetPCIDiskVendorProduct(vm->def->disks[i], &vendor, &product) < 0) + continue; + + if (qemuProcessAssignNextPCIAddress(&(vm->def->disks[i]->info), + vendor, product, + addrs, naddrs) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find PCI address for VirtIO disk %s"), + vm->def->disks[i]->dst); + return -1; + } + } + + for (i = 0 ; i < vm->def->nnets ; i++) { + if (qemuProcessGetPCINetVendorProduct(vm->def->nets[i], &vendor, &product) < 0) + continue; + + if (qemuProcessAssignNextPCIAddress(&(vm->def->nets[i]->info), + vendor, product, + addrs, naddrs) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find PCI address for %s NIC"), + vm->def->nets[i]->model); + return -1; + } + } + + for (i = 0 ; i < vm->def->ncontrollers ; i++) { + if (qemuProcessGetPCIControllerVendorProduct(vm->def->controllers[i], &vendor, &product) < 0) + continue; + + if (qemuProcessAssignNextPCIAddress(&(vm->def->controllers[i]->info), + vendor, product, + addrs, naddrs) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find PCI address for controller %s"), + virDomainControllerTypeToString(vm->def->controllers[i]->type)); + return -1; + } + } + + for (i = 0 ; i < vm->def->nvideos ; i++) { + if (qemuProcessGetPCIVideoVendorProduct(vm->def->videos[i], &vendor, &product) < 0) + continue; + + if (qemuProcessAssignNextPCIAddress(&(vm->def->videos[i]->info), + vendor, product, + addrs, naddrs) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find PCI address for video adapter %s"), + virDomainVideoTypeToString(vm->def->videos[i]->type)); + return -1; + } + } + + for (i = 0 ; i < vm->def->nsounds ; i++) { + if (qemuProcessGetPCISoundVendorProduct(vm->def->sounds[i], &vendor, &product) < 0) + continue; + + if (qemuProcessAssignNextPCIAddress(&(vm->def->sounds[i]->info), + vendor, product, + addrs, naddrs) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find PCI address for sound adapter %s"), + virDomainSoundModelTypeToString(vm->def->sounds[i]->model)); + return -1; + } + } + + + if (vm->def->watchdog && + qemuProcessGetPCIWatchdogVendorProduct(vm->def->watchdog, &vendor, &product) == 0) { + if (qemuProcessAssignNextPCIAddress(&(vm->def->watchdog->info), + vendor, product, + addrs, naddrs) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find PCI address for watchdog %s"), + virDomainWatchdogModelTypeToString(vm->def->watchdog->model)); + return -1; + } + } + + if (vm->def->memballoon && + qemuProcessGetPCIMemballoonVendorProduct(vm->def->memballoon, &vendor, &product) == 0) { + if (qemuProcessAssignNextPCIAddress(&(vm->def->memballoon->info), + vendor, product, + addrs, naddrs) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot find PCI address for balloon %s"), + virDomainMemballoonModelTypeToString(vm->def->memballoon->model)); + return -1; + } + } + + /* XXX console (virtio) */ + + + /* ... and now things we don't have in our xml */ + + /* XXX USB controller ? */ + + /* XXX what about other PCI devices (ie bridges) */ + + return 0; +} + +static int +qemuProcessInitPCIAddresses(struct qemud_driver *driver, + virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int naddrs; + int ret; + qemuMonitorPCIAddress *addrs = NULL; + + qemuDomainObjEnterMonitorWithDriver(driver, vm); + naddrs = qemuMonitorGetAllPCIAddresses(priv->mon, + &addrs); + qemuDomainObjExitMonitorWithDriver(driver, vm); + + ret = qemuProcessDetectPCIAddresses(vm, addrs, naddrs); + + VIR_FREE(addrs); + + return ret; +} + + +static int qemuProcessNextFreePort(struct qemud_driver *driver, + int startPort) +{ + int i; + + for (i = startPort ; i < QEMU_VNC_PORT_MAX; i++) { + int fd; + int reuse = 1; + struct sockaddr_in addr; + bool used = false; + + if (virBitmapGetBit(driver->reservedVNCPorts, + i - QEMU_VNC_PORT_MIN, &used) < 0) + VIR_DEBUG("virBitmapGetBit failed on bit %d", i - QEMU_VNC_PORT_MIN); + + if (used) + continue; + + addr.sin_family = AF_INET; + addr.sin_port = htons(i); + addr.sin_addr.s_addr = htonl(INADDR_ANY); + fd = socket(PF_INET, SOCK_STREAM, 0); + if (fd < 0) + return -1; + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void*)&reuse, sizeof(reuse)) < 0) { + VIR_FORCE_CLOSE(fd); + break; + } + + if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) { + /* Not in use, lets grab it */ + VIR_FORCE_CLOSE(fd); + /* Add port to bitmap of reserved ports */ + if (virBitmapSetBit(driver->reservedVNCPorts, + i - QEMU_VNC_PORT_MIN) < 0) { + VIR_DEBUG("virBitmapSetBit failed on bit %d", + i - QEMU_VNC_PORT_MIN); + } + return i; + } + VIR_FORCE_CLOSE(fd); + + if (errno == EADDRINUSE) { + /* In use, try next */ + continue; + } + /* Some other bad failure, get out.. */ + break; + } + return -1; +} + + +static void +qemuProcessReturnPort(struct qemud_driver *driver, + int port) +{ + if (port < QEMU_VNC_PORT_MIN) + return; + + if (virBitmapClearBit(driver->reservedVNCPorts, + port - QEMU_VNC_PORT_MIN) < 0) + VIR_DEBUG("Could not mark port %d as unused", port); +} + + +static int +qemuProcessPrepareChardevDevice(virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainChrDefPtr dev, + void *opaque ATTRIBUTE_UNUSED) +{ + int fd; + if (dev->source.type != VIR_DOMAIN_CHR_TYPE_FILE) + return 0; + + if ((fd = open(dev->source.data.file.path, + O_CREAT | O_APPEND, S_IRUSR|S_IWUSR)) < 0) { + virReportSystemError(errno, + _("Unable to pre-create chardev file '%s'"), + dev->source.data.file.path); + return -1; + } + + VIR_FORCE_CLOSE(fd); + + return 0; +} + + +struct qemuProcessHookData { + virConnectPtr conn; + virDomainObjPtr vm; + struct qemud_driver *driver; +}; + +static int qemuProcessHook(void *data) +{ + struct qemuProcessHookData *h = data; + + /* This must take place before exec(), so that all QEMU + * memory allocation is on the correct NUMA node + */ + if (qemuAddToCgroup(h->driver, h->vm->def) < 0) + return -1; + + /* This must be done after cgroup placement to avoid resetting CPU + * affinity */ + if (qemuProcessInitCpuAffinity(h->vm) < 0) + return -1; + + if (virSecurityManagerSetProcessLabel(h->driver->securityManager, h->vm) < 0) + return -1; + + return 0; +} + + +int +qemuProcessPrepareMonitorChr(struct qemud_driver *driver, + virDomainChrSourceDefPtr monConfig, + const char *vm) +{ + monConfig->type = VIR_DOMAIN_CHR_TYPE_UNIX; + monConfig->data.nix.listen = true; + + if (virAsprintf(&monConfig->data.nix.path, "%s/%s.monitor", + driver->libDir, vm) < 0) { + virReportOOMError(); + return -1; + } + + return 0; +} + + +int qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm, virConnectPtr conn) +{ + int ret; + qemuDomainObjPrivatePtr priv = vm->privateData; + + qemuDomainObjEnterMonitorWithDriver(driver, vm); + ret = qemuMonitorStartCPUs(priv->mon, conn); + if (ret == 0) { + vm->state = VIR_DOMAIN_RUNNING; + } + qemuDomainObjExitMonitorWithDriver(driver, vm); + + return ret; +} + + +int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm) +{ + int ret; + int oldState = vm->state; + qemuDomainObjPrivatePtr priv = vm->privateData; + + vm->state = VIR_DOMAIN_PAUSED; + qemuDomainObjEnterMonitorWithDriver(driver, vm); + ret = qemuMonitorStopCPUs(priv->mon); + qemuDomainObjExitMonitorWithDriver(driver, vm); + if (ret < 0) { + vm->state = oldState; + } + return ret; +} + + + +static int +qemuProcessFiltersInstantiate(virConnectPtr conn, + virDomainDefPtr def) +{ + int err = 0; + int i; + + if (!conn) + return 1; + + for (i = 0 ; i < def->nnets ; i++) { + virDomainNetDefPtr net = def->nets[i]; + if ((net->filter) && (net->ifname)) { + if (virDomainConfNWFilterInstantiate(conn, net)) { + err = 1; + break; + } + } + } + + return err; +} + +struct qemuProcessReconnectData { + virConnectPtr conn; + struct qemud_driver *driver; +}; +/* + * Open an existing VM's monitor, re-detect VCPU threads + * and re-reserve the security labels in use + */ +static void +qemuProcessReconnect(void *payload, const char *name ATTRIBUTE_UNUSED, void *opaque) +{ + virDomainObjPtr obj = payload; + struct qemuProcessReconnectData *data = opaque; + struct qemud_driver *driver = data->driver; + qemuDomainObjPrivatePtr priv; + unsigned long long qemuCmdFlags; + virConnectPtr conn = data->conn; + + virDomainObjLock(obj); + + VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name); + + priv = obj->privateData; + + /* Hold an extra reference because we can't allow 'vm' to be + * deleted if qemuConnectMonitor() failed */ + virDomainObjRef(obj); + + /* XXX check PID liveliness & EXE path */ + if (qemuConnectMonitor(driver, obj) < 0) + goto error; + + if (qemuUpdateActivePciHostdevs(driver, obj->def) < 0) { + goto error; + } + + /* XXX we should be persisting the original flags in the XML + * not re-detecting them, since the binary may have changed + * since launch time */ + if (qemuCapsExtractVersionInfo(obj->def->emulator, + NULL, + &qemuCmdFlags) >= 0 && + (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE)) { + priv->persistentAddrs = 1; + + if (!(priv->pciaddrs = qemuDomainPCIAddressSetCreate(obj->def)) || + qemuAssignDevicePCISlots(obj->def, priv->pciaddrs) < 0) + goto error; + } + + if (virSecurityManagerReserveLabel(driver->securityManager, obj) < 0) + goto error; + + if (qemuProcessFiltersInstantiate(conn, obj->def)) + goto error; + + if (obj->def->id >= driver->nextvmid) + driver->nextvmid = obj->def->id + 1; + + if (virDomainObjUnref(obj) > 0) + virDomainObjUnlock(obj); + return; + +error: + if (!virDomainObjIsActive(obj)) { + if (virDomainObjUnref(obj) > 0) + virDomainObjUnlock(obj); + return; + } + + if (virDomainObjUnref(obj) > 0) { + /* We can't get the monitor back, so must kill the VM + * to remove danger of it ending up running twice if + * user tries to start it again later */ + qemuProcessStop(driver, obj, 0); + if (!obj->persistent) + virDomainRemoveInactive(&driver->domains, obj); + else + virDomainObjUnlock(obj); + } +} + +/** + * qemuProcessReconnectAll + * + * Try to re-open the resources for live VMs that we care + * about. + */ +void +qemuProcessReconnectAll(virConnectPtr conn, struct qemud_driver *driver) +{ + struct qemuProcessReconnectData data = {conn, driver}; + virHashForEach(driver->domains.objs, qemuProcessReconnect, &data); +} + +int qemuProcessStart(virConnectPtr conn, + struct qemud_driver *driver, + virDomainObjPtr vm, + const char *migrateFrom, + bool start_paused, + int stdin_fd, + const char *stdin_path, + enum virVMOperationType vmop) +{ + int ret; + unsigned long long qemuCmdFlags; + off_t pos = -1; + char ebuf[1024]; + char *pidfile = NULL; + int logfile = -1; + char *timestamp; + qemuDomainObjPrivatePtr priv = vm->privateData; + virCommandPtr cmd = NULL; + struct qemuProcessHookData hookData; + + hookData.conn = conn; + hookData.vm = vm; + hookData.driver = driver; + + DEBUG0("Beginning VM startup process"); + + if (virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("VM is already active")); + return -1; + } + + /* Do this upfront, so any part of the startup process can add + * runtime state to vm->def that won't be persisted. This let's us + * report implicit runtime defaults in the XML, like vnc listen/socket + */ + DEBUG0("Setting current domain def as transient"); + if (virDomainObjSetDefTransient(driver->caps, vm, true) < 0) + goto cleanup; + + /* Must be run before security labelling */ + DEBUG0("Preparing host devices"); + if (qemuPrepareHostDevices(driver, vm->def) < 0) + goto cleanup; + + DEBUG0("Preparing chr devices"); + if (virDomainChrDefForeach(vm->def, + true, + qemuProcessPrepareChardevDevice, + NULL) < 0) + goto cleanup; + + /* If you are using a SecurityDriver with dynamic labelling, + then generate a security label for isolation */ + DEBUG0("Generating domain security label (if required)"); + if (virSecurityManagerGenLabel(driver->securityManager, vm) < 0) { + qemuDomainSecurityLabelAudit(vm, false); + goto cleanup; + } + qemuDomainSecurityLabelAudit(vm, true); + + DEBUG0("Generating setting domain security labels (if required)"); + if (virSecurityManagerSetAllLabel(driver->securityManager, + vm, stdin_path) < 0) + goto cleanup; + + if (stdin_fd != -1) { + /* if there's an fd to migrate from, and it's a pipe, put the + * proper security label on it + */ + struct stat stdin_sb; + + DEBUG0("setting security label on pipe used for migration"); + + if (fstat(stdin_fd, &stdin_sb) < 0) { + virReportSystemError(errno, + _("cannot stat fd %d"), stdin_fd); + goto cleanup; + } + if (S_ISFIFO(stdin_sb.st_mode) && + virSecurityManagerSetFDLabel(driver->securityManager, vm, stdin_fd) < 0) + goto cleanup; + } + + /* Ensure no historical cgroup for this VM is lying around bogus + * settings */ + DEBUG0("Ensuring no historical cgroup is lying around"); + qemuRemoveCgroup(driver, vm, 1); + + if (vm->def->ngraphics == 1) { + if (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && + !vm->def->graphics[0]->data.vnc.socket && + vm->def->graphics[0]->data.vnc.autoport) { + int port = qemuProcessNextFreePort(driver, QEMU_VNC_PORT_MIN); + if (port < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Unable to find an unused VNC port")); + goto cleanup; + } + vm->def->graphics[0]->data.vnc.port = port; + } else if (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE && + vm->def->graphics[0]->data.spice.autoport) { + int port = qemuProcessNextFreePort(driver, QEMU_VNC_PORT_MIN); + int tlsPort = -1; + if (port < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Unable to find an unused SPICE port")); + goto cleanup; + } + + if (driver->spiceTLS) { + tlsPort = qemuProcessNextFreePort(driver, port + 1); + if (tlsPort < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Unable to find an unused SPICE TLS port")); + qemuProcessReturnPort(driver, port); + goto cleanup; + } + } + + vm->def->graphics[0]->data.spice.port = port; + vm->def->graphics[0]->data.spice.tlsPort = tlsPort; + } + } + + if (virFileMakePath(driver->logDir) != 0) { + virReportSystemError(errno, + _("cannot create log directory %s"), + driver->logDir); + goto cleanup; + } + + DEBUG0("Creating domain log file"); + if ((logfile = qemuProcessLogFD(driver, vm->def->name, false)) < 0) + goto cleanup; + + DEBUG0("Determining emulator version"); + if (qemuCapsExtractVersionInfo(vm->def->emulator, + NULL, + &qemuCmdFlags) < 0) + goto cleanup; + + DEBUG0("Setting up domain cgroup (if required)"); + if (qemuSetupCgroup(driver, vm) < 0) + goto cleanup; + + if (VIR_ALLOC(priv->monConfig) < 0) { + virReportOOMError(); + goto cleanup; + } + + DEBUG0("Preparing monitor state"); + if (qemuProcessPrepareMonitorChr(driver, priv->monConfig, vm->def->name) < 0) + goto cleanup; + +#if HAVE_YAJL + if (qemuCmdFlags & QEMUD_CMD_FLAG_MONITOR_JSON) + priv->monJSON = 1; + else +#endif + priv->monJSON = 0; + + priv->monitor_warned = 0; + priv->gotShutdown = false; + + if ((ret = virFileDeletePid(driver->stateDir, vm->def->name)) != 0) { + virReportSystemError(ret, + _("Cannot remove stale PID file for %s"), + vm->def->name); + goto cleanup; + } + + if (!(pidfile = virFilePid(driver->stateDir, vm->def->name))) { + virReportSystemError(errno, + "%s", _("Failed to build pidfile path.")); + goto cleanup; + } + + /* + * Normally PCI addresses are assigned in the virDomainCreate + * or virDomainDefine methods. We might still need to assign + * some here to cope with the question of upgrades. Regardless + * we also need to populate the PCi address set cache for later + * use in hotplug + */ + if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) { + DEBUG0("Assigning domain PCI addresses"); + /* Populate cache with current addresses */ + if (priv->pciaddrs) { + qemuDomainPCIAddressSetFree(priv->pciaddrs); + priv->pciaddrs = NULL; + } + if (!(priv->pciaddrs = qemuDomainPCIAddressSetCreate(vm->def))) + goto cleanup; + + + /* Assign any remaining addresses */ + if (qemuAssignDevicePCISlots(vm->def, priv->pciaddrs) < 0) + goto cleanup; + + priv->persistentAddrs = 1; + } else { + priv->persistentAddrs = 0; + } + + DEBUG0("Building emulator command line"); + vm->def->id = driver->nextvmid++; + if (!(cmd = qemuBuildCommandLine(conn, driver, vm->def, priv->monConfig, + priv->monJSON != 0, qemuCmdFlags, + migrateFrom, stdin_fd, + vm->current_snapshot, vmop))) + goto cleanup; + +#if 0 + /* XXX */ + if (qemuDomainSnapshotSetCurrentInactive(vm, driver->snapshotDir) < 0) + goto cleanup; +#endif + + /* now that we know it is about to start call the hook if present */ + if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) { + char *xml = virDomainDefFormat(vm->def, 0); + int hookret; + + hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, + VIR_HOOK_QEMU_OP_START, VIR_HOOK_SUBOP_BEGIN, NULL, xml); + VIR_FREE(xml); + + /* + * If the script raised an error abort the launch + */ + if (hookret < 0) + goto cleanup; + } + + if ((timestamp = virTimestamp()) == NULL) { + virReportOOMError(); + goto cleanup; + } else { + if (safewrite(logfile, timestamp, strlen(timestamp)) < 0 || + safewrite(logfile, START_POSTFIX, strlen(START_POSTFIX)) < 0) { + VIR_WARN("Unable to write timestamp to logfile: %s", + virStrerror(errno, ebuf, sizeof ebuf)); + } + + VIR_FREE(timestamp); + } + + virCommandWriteArgLog(cmd, logfile); + + if ((pos = lseek(logfile, 0, SEEK_END)) < 0) + VIR_WARN("Unable to seek to end of logfile: %s", + virStrerror(errno, ebuf, sizeof ebuf)); + + VIR_DEBUG("Clear emulator capabilities: %d", + driver->clearEmulatorCapabilities); + if (driver->clearEmulatorCapabilities) + virCommandClearCaps(cmd); + + virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData); + + virCommandSetOutputFD(cmd, &logfile); + virCommandSetErrorFD(cmd, &logfile); + virCommandNonblockingFDs(cmd); + virCommandSetPidFile(cmd, pidfile); + virCommandDaemonize(cmd); + + ret = virCommandRun(cmd, NULL); + VIR_FREE(pidfile); + + /* wait for qemu process to to show up */ + if (ret == 0) { + if (virFileReadPid(driver->stateDir, vm->def->name, &vm->pid)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Domain %s didn't show up"), vm->def->name); + ret = -1; + } +#if 0 + } else if (ret == -2) { + /* + * XXX this is bogus. It isn't safe to set vm->pid = child + * because the child no longer exists. + */ + + /* The virExec process that launches the daemon failed. Pending on + * when it failed (we can't determine for sure), there may be + * extra info in the domain log (if the hook failed for example). + * + * Pretend like things succeeded, and let 'WaitForMonitor' report + * the log contents for us. + */ + vm->pid = child; + ret = 0; +#endif + } + + if (migrateFrom) + start_paused = true; + vm->state = start_paused ? VIR_DOMAIN_PAUSED : VIR_DOMAIN_RUNNING; + + if (ret == -1) /* The VM failed to start; tear filters before taps */ + virDomainConfVMNWFilterTeardown(vm); + + if (ret == -1) /* The VM failed to start */ + goto cleanup; + + DEBUG0("Waiting for monitor to show up"); + if (qemuProcessWaitForMonitor(driver, vm, pos) < 0) + goto cleanup; + + DEBUG0("Detecting VCPU PIDs"); + if (qemuProcessDetectVcpuPIDs(driver, vm) < 0) + goto cleanup; + + DEBUG0("Setting any required VM passwords"); + if (qemuProcessInitPasswords(conn, driver, vm, qemuCmdFlags) < 0) + goto cleanup; + + /* If we have -device, then addresses are assigned explicitly. + * If not, then we have to detect dynamic ones here */ + if (!(qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE)) { + DEBUG0("Determining domain device PCI addresses"); + if (qemuProcessInitPCIAddresses(driver, vm) < 0) + goto cleanup; + } + + DEBUG0("Setting initial memory amount"); + qemuDomainObjEnterMonitorWithDriver(driver, vm); + if (qemuMonitorSetBalloon(priv->mon, vm->def->mem.cur_balloon) < 0) { + qemuDomainObjExitMonitorWithDriver(driver, vm); + goto cleanup; + } + qemuDomainObjExitMonitorWithDriver(driver, vm); + + if (!start_paused) { + DEBUG0("Starting domain CPUs"); + /* Allow the CPUS to start executing */ + if (qemuProcessStartCPUs(driver, vm, conn) < 0) { + if (virGetLastError() == NULL) + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("resume operation failed")); + goto cleanup; + } + } + + + DEBUG0("Writing domain status to disk"); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) + goto cleanup; + + virCommandFree(cmd); + VIR_FORCE_CLOSE(logfile); + + return 0; + +cleanup: + /* We jump here if we failed to start the VM for any reason, or + * if we failed to initialize the now running VM. kill it off and + * pretend we never started it */ + virCommandFree(cmd); + VIR_FORCE_CLOSE(logfile); + qemuProcessStop(driver, vm, 0); + + return -1; +} + + +void qemuProcessStop(struct qemud_driver *driver, + virDomainObjPtr vm, + int migrated) +{ + int ret; + int retries = 0; + qemuDomainObjPrivatePtr priv = vm->privateData; + virErrorPtr orig_err; + virDomainDefPtr def; + int i; + int logfile = -1; + char *timestamp; + char ebuf[1024]; + + VIR_DEBUG("Shutting down VM '%s' pid=%d migrated=%d", + vm->def->name, vm->pid, migrated); + + if (!virDomainObjIsActive(vm)) { + VIR_DEBUG("VM '%s' not active", vm->def->name); + return; + } + + if ((logfile = qemuProcessLogFD(driver, vm->def->name, true)) < 0) { + /* To not break the normal domain shutdown process, skip the + * timestamp log writing if failed on opening log file. */ + VIR_WARN("Unable to open logfile: %s", + virStrerror(errno, ebuf, sizeof ebuf)); + } else { + if ((timestamp = virTimestamp()) == NULL) { + virReportOOMError(); + } else { + if (safewrite(logfile, timestamp, strlen(timestamp)) < 0 || + safewrite(logfile, SHUTDOWN_POSTFIX, + strlen(SHUTDOWN_POSTFIX)) < 0) { + VIR_WARN("Unable to write timestamp to logfile: %s", + virStrerror(errno, ebuf, sizeof ebuf)); + } + + VIR_FREE(timestamp); + } + + if (VIR_CLOSE(logfile) < 0) + VIR_WARN("Unable to close logfile: %s", + virStrerror(errno, ebuf, sizeof ebuf)); + } + + /* This method is routinely used in clean up paths. Disable error + * reporting so we don't squash a legit error. */ + orig_err = virSaveLastError(); + + virDomainConfVMNWFilterTeardown(vm); + + if (driver->macFilter) { + def = vm->def; + for (i = 0 ; i < def->nnets ; i++) { + virDomainNetDefPtr net = def->nets[i]; + if (net->ifname == NULL) + continue; + if ((errno = networkDisallowMacOnPort(driver, net->ifname, + net->mac))) { + virReportSystemError(errno, + _("failed to remove ebtables rule to allow MAC address on '%s'"), + net->ifname); + } + } + } + + /* This will safely handle a non-running guest with pid=0 or pid=-1*/ + if (virKillProcess(vm->pid, 0) == 0 && + virKillProcess(vm->pid, SIGTERM) < 0) + virReportSystemError(errno, + _("Failed to send SIGTERM to %s (%d)"), + vm->def->name, vm->pid); + + if (priv->mon) + qemuMonitorClose(priv->mon); + + if (priv->monConfig) { + if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX) + unlink(priv->monConfig->data.nix.path); + virDomainChrSourceDefFree(priv->monConfig); + priv->monConfig = NULL; + } + + /* shut it off for sure */ + virKillProcess(vm->pid, SIGKILL); + + /* now that we know it's stopped call the hook if present */ + if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) { + char *xml = virDomainDefFormat(vm->def, 0); + + /* we can't stop the operation even if the script raised an error */ + virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, + VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END, NULL, xml); + VIR_FREE(xml); + } + + /* Reset Security Labels */ + virSecurityManagerRestoreAllLabel(driver->securityManager, + vm, migrated); + virSecurityManagerReleaseLabel(driver->securityManager, vm); + + /* Clear out dynamically assigned labels */ + if (vm->def->seclabel.type == VIR_DOMAIN_SECLABEL_DYNAMIC) { + VIR_FREE(vm->def->seclabel.model); + VIR_FREE(vm->def->seclabel.label); + VIR_FREE(vm->def->seclabel.imagelabel); + } + + virDomainDefClearDeviceAliases(vm->def); + if (!priv->persistentAddrs) { + virDomainDefClearPCIAddresses(vm->def); + qemuDomainPCIAddressSetFree(priv->pciaddrs); + priv->pciaddrs = NULL; + } + + qemuDomainReAttachHostDevices(driver, vm->def); + +#if WITH_MACVTAP + def = vm->def; + for (i = 0; i < def->nnets; i++) { + virDomainNetDefPtr net = def->nets[i]; + if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { + delMacvtap(net->ifname, net->mac, net->data.direct.linkdev, + &net->data.direct.virtPortProfile); + VIR_FREE(net->ifname); + } + } +#endif + +retry: + if ((ret = qemuRemoveCgroup(driver, vm, 0)) < 0) { + if (ret == -EBUSY && (retries++ < 5)) { + usleep(200*1000); + goto retry; + } + VIR_WARN("Failed to remove cgroup for %s", + vm->def->name); + } + + qemuProcessRemoveDomainStatus(driver, vm); + + /* Remove VNC port from port reservation bitmap, but only if it was + reserved by the driver (autoport=yes) + */ + if ((vm->def->ngraphics == 1) && + vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC && + vm->def->graphics[0]->data.vnc.autoport) { + qemuProcessReturnPort(driver, vm->def->graphics[0]->data.vnc.port); + } + if ((vm->def->ngraphics == 1) && + vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE && + vm->def->graphics[0]->data.spice.autoport) { + qemuProcessReturnPort(driver, vm->def->graphics[0]->data.spice.port); + qemuProcessReturnPort(driver, vm->def->graphics[0]->data.spice.tlsPort); + } + + vm->pid = -1; + vm->def->id = -1; + vm->state = VIR_DOMAIN_SHUTOFF; + VIR_FREE(priv->vcpupids); + priv->nvcpupids = 0; + + if (vm->newDef) { + virDomainDefFree(vm->def); + vm->def = vm->newDef; + vm->def->id = -1; + vm->newDef = NULL; + } + + if (orig_err) { + virSetError(orig_err); + virFreeError(orig_err); + } +} diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h new file mode 100644 index 0000000..f1ab599 --- /dev/null +++ b/src/qemu/qemu_process.h @@ -0,0 +1,52 @@ +/* + * qemu_process.c: QEMU process management + * + * Copyright (C) 2006-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __QEMU_PROCESS_H__ +# define __QEMU_PROCESS_H__ + +# include "qemu_conf.h" + +int qemuProcessPrepareMonitorChr(struct qemud_driver *driver, + virDomainChrSourceDefPtr monConfig, + const char *vm); + +int qemuProcessStartCPUs(struct qemud_driver *driver, virDomainObjPtr vm, virConnectPtr conn); +int qemuProcessStopCPUs(struct qemud_driver *driver, virDomainObjPtr vm); + +void qemuProcessAutostartAll(struct qemud_driver *driver); +void qemuProcessReconnectAll(virConnectPtr conn, struct qemud_driver *driver); + +int qemuProcessAssignPCIAddresses(virDomainDefPtr def); + +int qemuProcessStart(virConnectPtr conn, + struct qemud_driver *driver, + virDomainObjPtr vm, + const char *migrateFrom, + bool start_paused, + int stdin_fd, + const char *stdin_path, + enum virVMOperationType vmop); + +void qemuProcessStop(struct qemud_driver *driver, + virDomainObjPtr vm, + int migrated); + +#endif /* __QEMU_PROCESS_H__ */ -- 1.7.4

On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
Move the qemudStartVMDaemon and qemudShutdownVMDaemon methods into a separate file, renaming them to qemuProcessStart, qemuProcessStop. All helper methods called by these are also moved & renamed to match
* src/Makefile.am: Add qemu_process.c/.h * src/qemu/qemu_command.c: Add emuDomainAssignPCIAddresses
s/ emu/ qemu/
* src/qemu/qemu_command.h: Add VNC port min/max * src/qemu/qemu_domain.c, src/qemu/qemu_domain.h: Add domain event queue helpers * src/qemu/qemu_driver.c, src/qemu/qemu_driver.h: Remove all QEMU process startup/shutdown functions * src/qemu/qemu_process.c, src/qemu/qemu_process.h: Add all QEMU process startup/shutdown functions --- po/POTFILES.in | 1 + src/Makefile.am | 1 + src/qemu/qemu_command.c | 29 + src/qemu/qemu_command.h | 5 + src/qemu/qemu_domain.c | 56 + src/qemu/qemu_domain.h | 11 + src/qemu/qemu_driver.c | 3535 ++++++++--------------------------------------- src/qemu/qemu_driver.h | 31 +- src/qemu/qemu_process.c | 2417 ++++++++++++++++++++++++++++++++ src/qemu/qemu_process.h | 52 + 10 files changed, 3121 insertions(+), 3017 deletions(-) create mode 100644 src/qemu/qemu_process.c create mode 100644 src/qemu/qemu_process.h
ACK; I reviewed an interdiff between this and your v1 [1], and you have correctly picked up all the changes that went in to qemu_driver.c since that posting, as well as resolved all my concerns from that post, except for the spelling nit in the commit message. [1] https://www.redhat.com/archives/libvir-list/2011-January/msg01255.html -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 02/09/2011 12:43 PM, Eric Blake wrote:
On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
Move the qemudStartVMDaemon and qemudShutdownVMDaemon methods into a separate file, renaming them to qemuProcessStart, qemuProcessStop. All helper methods called by these are also moved & renamed to match
* src/Makefile.am: Add qemu_process.c/.h * src/qemu/qemu_command.c: Add emuDomainAssignPCIAddresses
s/ emu/ qemu/
ACK; I reviewed an interdiff between this and your v1 [1], and you have correctly picked up all the changes that went in to qemu_driver.c since that posting, as well as resolved all my concerns from that post, except for the spelling nit in the commit message.
Also, be sure you don't lose commit eacb3bb02 when rebasing (I almost did when testing out your patch today). -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

The introduction of the v3 migration protocol, along with support for migration cookies, will significantly expand the size of the migration code. Move it all to a separate file to make it more manageable The functions are not moved 100%. The API entry points remain in the main QEMU driver, but once the public virDomainPtr is resolved to the internal virDomainObjPtr, all following code is moved. This will allow the new v3 API entry points to call into the same shared internal migration functions * src/qemu/qemu_domain.c, src/qemu/qemu_domain.h: Add qemuDomainFormatXML helper method * src/qemu/qemu_driver.c: Remove all migration code * src/qemu/qemu_migration.c, src/qemu/qemu_migration.h: Add all migration code. --- po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/qemu/qemu_domain.c | 39 ++ src/qemu/qemu_domain.h | 4 + src/qemu/qemu_driver.c | 1297 ++------------------------------------------- src/qemu/qemu_migration.c | 1295 ++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_migration.h | 63 +++ 7 files changed, 1445 insertions(+), 1257 deletions(-) create mode 100644 src/qemu/qemu_migration.c create mode 100644 src/qemu/qemu_migration.h diff --git a/po/POTFILES.in b/po/POTFILES.in index 343fe5d..2256cb2 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -58,6 +58,7 @@ src/qemu/qemu_domain.c src/qemu/qemu_driver.c src/qemu/qemu_hostdev.c src/qemu/qemu_hotplug.c +src/qemu/qemu_migration.c src/qemu/qemu_monitor.c src/qemu/qemu_monitor_json.c src/qemu/qemu_monitor_text.c diff --git a/src/Makefile.am b/src/Makefile.am index 15a4e8c..36e08a0 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -281,7 +281,8 @@ QEMU_DRIVER_SOURCES = \ qemu/qemu_hostdev.c qemu/qemu_hostdev.h \ qemu/qemu_hotplug.c qemu/qemu_hotplug.h \ qemu/qemu_conf.c qemu/qemu_conf.h \ - qemu/qemu_process.c qemu/qemu_process.h \ + qemu/qemu_process.c qemu/qemu_process.h \ + qemu/qemu_migration.c qemu/qemu_migration.h \ qemu/qemu_monitor.c qemu/qemu_monitor.h \ qemu/qemu_monitor_text.c \ qemu/qemu_monitor_text.h \ diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index e3163ab..8a2b9cc 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -30,6 +30,7 @@ #include "virterror_internal.h" #include "c-ctype.h" #include "event.h" +#include "cpu/cpu.h" #include <sys/time.h> @@ -653,3 +654,41 @@ void qemuDomainObjExitRemoteWithDriver(struct qemud_driver *driver, virDomainObjLock(obj); virDomainObjUnref(obj); } + + +char *qemuDomainFormatXML(struct qemud_driver *driver, + virDomainObjPtr vm, + int flags) +{ + char *ret = NULL; + virCPUDefPtr cpu = NULL; + virDomainDefPtr def; + virCPUDefPtr def_cpu; + + if ((flags & VIR_DOMAIN_XML_INACTIVE) && vm->newDef) + def = vm->newDef; + else + def = vm->def; + def_cpu = def->cpu; + + /* Update guest CPU requirements according to host CPU */ + if ((flags & VIR_DOMAIN_XML_UPDATE_CPU) && def_cpu && def_cpu->model) { + if (!driver->caps || !driver->caps->host.cpu) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("cannot get host CPU capabilities")); + goto cleanup; + } + + if (!(cpu = virCPUDefCopy(def_cpu)) + || cpuUpdate(cpu, driver->caps->host.cpu)) + goto cleanup; + def->cpu = cpu; + } + + ret = virDomainDefFormat(def, flags); + +cleanup: + def->cpu = def_cpu; + virCPUDefFree(cpu); + return ret; +} diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 4333a78..ebb2050 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -107,4 +107,8 @@ void qemuDomainObjEnterRemoteWithDriver(struct qemud_driver *driver, void qemuDomainObjExitRemoteWithDriver(struct qemud_driver *driver, virDomainObjPtr obj); +char *qemuDomainFormatXML(struct qemud_driver *driver, + virDomainObjPtr vm, + int flags); + #endif /* __QEMU_DOMAIN_H__ */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 9cc6e89..21e88f8 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -57,6 +57,7 @@ #include "qemu_bridge_filter.h" #include "qemu_audit.h" #include "qemu_process.h" +#include "qemu_migration.h" #include "virterror_internal.h" #include "logging.h" @@ -1691,176 +1692,6 @@ cleanup: } -/** qemuDomainMigrateOffline: - * Pause domain for non-live migration. - */ -static int -qemuDomainMigrateOffline(struct qemud_driver *driver, - virDomainObjPtr vm) -{ - int ret; - - ret = qemuProcessStopCPUs(driver, vm); - if (ret == 0) { - virDomainEventPtr event; - - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED); - if (event) - qemuDomainEventQueue(driver, event); - } - - return ret; -} - - -static int -qemuDomainWaitForMigrationComplete(struct qemud_driver *driver, virDomainObjPtr vm) -{ - int ret = -1; - int status; - unsigned long long memProcessed; - unsigned long long memRemaining; - unsigned long long memTotal; - qemuDomainObjPrivatePtr priv = vm->privateData; - - priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; - - while (priv->jobInfo.type == VIR_DOMAIN_JOB_UNBOUNDED) { - /* Poll every 50ms for progress & to allow cancellation */ - struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull }; - struct timeval now; - int rc; - const char *job; - - switch (priv->jobActive) { - case QEMU_JOB_MIGRATION_OUT: - job = _("migration job"); - break; - case QEMU_JOB_SAVE: - job = _("domain save job"); - break; - case QEMU_JOB_DUMP: - job = _("domain core dump job"); - break; - default: - job = _("job"); - } - - - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"), - job, _("guest unexpectedly quit")); - goto cleanup; - } - - if (priv->jobSignals & QEMU_JOB_SIGNAL_CANCEL) { - priv->jobSignals ^= QEMU_JOB_SIGNAL_CANCEL; - VIR_DEBUG0("Cancelling job at client request"); - qemuDomainObjEnterMonitorWithDriver(driver, vm); - rc = qemuMonitorMigrateCancel(priv->mon); - qemuDomainObjExitMonitorWithDriver(driver, vm); - if (rc < 0) { - VIR_WARN0("Unable to cancel job"); - } - } else if (priv->jobSignals & QEMU_JOB_SIGNAL_SUSPEND) { - priv->jobSignals ^= QEMU_JOB_SIGNAL_SUSPEND; - VIR_DEBUG0("Pausing domain for non-live migration"); - if (qemuDomainMigrateOffline(driver, vm) < 0) - VIR_WARN0("Unable to pause domain"); - } else if (priv->jobSignals & QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME) { - unsigned long long ms = priv->jobSignalsData.migrateDowntime; - - priv->jobSignals ^= QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME; - priv->jobSignalsData.migrateDowntime = 0; - VIR_DEBUG("Setting migration downtime to %llums", ms); - qemuDomainObjEnterMonitorWithDriver(driver, vm); - rc = qemuMonitorSetMigrationDowntime(priv->mon, ms); - qemuDomainObjExitMonitorWithDriver(driver, vm); - if (rc < 0) - VIR_WARN0("Unable to set migration downtime"); - } - - /* Repeat check because the job signals might have caused - * guest to die - */ - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"), - job, _("guest unexpectedly quit")); - goto cleanup; - } - - qemuDomainObjEnterMonitorWithDriver(driver, vm); - rc = qemuMonitorGetMigrationStatus(priv->mon, - &status, - &memProcessed, - &memRemaining, - &memTotal); - qemuDomainObjExitMonitorWithDriver(driver, vm); - - if (rc < 0) { - priv->jobInfo.type = VIR_DOMAIN_JOB_FAILED; - goto cleanup; - } - - if (gettimeofday(&now, NULL) < 0) { - priv->jobInfo.type = VIR_DOMAIN_JOB_FAILED; - virReportSystemError(errno, "%s", - _("cannot get time of day")); - goto cleanup; - } - priv->jobInfo.timeElapsed = timeval_to_ms(now) - priv->jobStart; - - switch (status) { - case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: - priv->jobInfo.type = VIR_DOMAIN_JOB_NONE; - qemuReportError(VIR_ERR_OPERATION_FAILED, - _("%s: %s"), job, _("is not active")); - break; - - case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE: - priv->jobInfo.dataTotal = memTotal; - priv->jobInfo.dataRemaining = memRemaining; - priv->jobInfo.dataProcessed = memProcessed; - - priv->jobInfo.memTotal = memTotal; - priv->jobInfo.memRemaining = memRemaining; - priv->jobInfo.memProcessed = memProcessed; - break; - - case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: - priv->jobInfo.type = VIR_DOMAIN_JOB_COMPLETED; - ret = 0; - break; - - case QEMU_MONITOR_MIGRATION_STATUS_ERROR: - priv->jobInfo.type = VIR_DOMAIN_JOB_FAILED; - qemuReportError(VIR_ERR_OPERATION_FAILED, - _("%s: %s"), job, _("unexpectedly failed")); - break; - - case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED: - priv->jobInfo.type = VIR_DOMAIN_JOB_CANCELLED; - qemuReportError(VIR_ERR_OPERATION_FAILED, - _("%s: %s"), job, _("canceled by client")); - break; - } - - virDomainObjUnlock(vm); - qemuDriverUnlock(driver); - - nanosleep(&ts, NULL); - - qemuDriverLock(driver); - virDomainObjLock(vm); - } - -cleanup: - return ret; -} - - #define QEMUD_SAVE_MAGIC "LibvirtQemudSave" #define QEMUD_SAVE_VERSION 2 @@ -2161,7 +1992,7 @@ static int qemudDomainSaveFlag(struct qemud_driver *driver, virDomainPtr dom, if (rc < 0) goto endjob; - rc = qemuDomainWaitForMigrationComplete(driver, vm); + rc = qemuMigrationWaitForCompletion(driver, vm); if (rc < 0) goto endjob; @@ -2469,7 +2300,7 @@ static int doCoreDump(struct qemud_driver *driver, if (ret < 0) goto cleanup; - ret = qemuDomainWaitForMigrationComplete(driver, vm); + ret = qemuMigrationWaitForCompletion(driver, vm); if (ret < 0) goto cleanup; @@ -3605,44 +3436,6 @@ cleanup: } -static char *qemudVMDumpXML(struct qemud_driver *driver, - virDomainObjPtr vm, - int flags) -{ - char *ret = NULL; - virCPUDefPtr cpu = NULL; - virDomainDefPtr def; - virCPUDefPtr def_cpu; - - if ((flags & VIR_DOMAIN_XML_INACTIVE) && vm->newDef) - def = vm->newDef; - else - def = vm->def; - def_cpu = def->cpu; - - /* Update guest CPU requirements according to host CPU */ - if ((flags & VIR_DOMAIN_XML_UPDATE_CPU) && def_cpu && def_cpu->model) { - if (!driver->caps || !driver->caps->host.cpu) { - qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("cannot get host CPU capabilities")); - goto cleanup; - } - - if (!(cpu = virCPUDefCopy(def_cpu)) - || cpuUpdate(cpu, driver->caps->host.cpu)) - goto cleanup; - def->cpu = cpu; - } - - ret = virDomainDefFormat(def, flags); - -cleanup: - def->cpu = def_cpu; - virCPUDefFree(cpu); - return ret; -} - - static char *qemudDomainDumpXML(virDomainPtr dom, int flags) { struct qemud_driver *driver = dom->conn->privateData; @@ -3688,7 +3481,7 @@ static char *qemudDomainDumpXML(virDomainPtr dom, } } - ret = qemudVMDumpXML(driver, vm, flags); + ret = qemuDomainFormatXML(driver, vm, flags); cleanup: if (vm) @@ -5552,18 +5345,6 @@ qemuDomainEventDeregisterAny(virConnectPtr conn, /* Migration support. */ -static bool ATTRIBUTE_NONNULL(1) -qemuDomainIsMigratable(virDomainDefPtr def) -{ - if (def->nhostdevs > 0) { - qemuReportError(VIR_ERR_OPERATION_INVALID, - "%s", _("Domain with assigned host devices cannot be migrated")); - return false; - } - - return true; -} - /* Prepare is the first step, and it runs on the destination host. * * This version starts an empty VM listening on a localhost TCP port, and @@ -5578,24 +5359,8 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, const char *dom_xml) { struct qemud_driver *driver = dconn->privateData; - virDomainDefPtr def = NULL; - virDomainObjPtr vm = NULL; - char *migrateFrom; - virDomainEventPtr event = NULL; int ret = -1; - int internalret; - char *unixfile = NULL; - unsigned long long qemuCmdFlags; - qemuDomainObjPrivatePtr priv = NULL; - struct timeval now; - - if (gettimeofday(&now, NULL) < 0) { - virReportSystemError(errno, "%s", - _("cannot get time of day")); - return -1; - } - qemuDriverLock(driver); if (!dom_xml) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no domain XML passed")); @@ -5612,140 +5377,12 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, goto cleanup; } - /* Parse the domain XML. */ - if (!(def = virDomainDefParseString(driver->caps, dom_xml, - VIR_DOMAIN_XML_INACTIVE))) { - qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("failed to parse XML, libvirt version may be " - "different between source and destination host")); - goto cleanup; - } - - if (!qemuDomainIsMigratable(def)) - goto cleanup; - - /* Target domain name, maybe renamed. */ - if (dname) { - VIR_FREE(def->name); - def->name = strdup(dname); - if (def->name == NULL) - goto cleanup; - } - - if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) - goto cleanup; - - if (!(vm = virDomainAssignDef(driver->caps, - &driver->domains, - def, true))) { - /* virDomainAssignDef already set the error */ - goto cleanup; - } - def = NULL; - priv = vm->privateData; - - if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) - goto cleanup; - priv->jobActive = QEMU_JOB_MIGRATION_OUT; - - /* Domain starts inactive, even if the domain XML had an id field. */ - vm->def->id = -1; - - if (virAsprintf(&unixfile, "%s/qemu.tunnelmigrate.dest.%s", - driver->libDir, vm->def->name) < 0) { - virReportOOMError(); - goto endjob; - } - unlink(unixfile); - - /* check that this qemu version supports the interactive exec */ - if (qemuCapsExtractVersionInfo(vm->def->emulator, NULL, &qemuCmdFlags) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot determine QEMU argv syntax %s"), - vm->def->emulator); - goto endjob; - } - if (qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_UNIX) - internalret = virAsprintf(&migrateFrom, "unix:%s", unixfile); - else if (qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_EXEC) - internalret = virAsprintf(&migrateFrom, "exec:nc -U -l %s", unixfile); - else { - qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("Destination qemu is too old to support tunnelled migration")); - goto endjob; - } - if (internalret < 0) { - virReportOOMError(); - goto endjob; - } - /* Start the QEMU daemon, with the same command-line arguments plus - * -incoming unix:/path/to/file or exec:nc -U /path/to/file - */ - internalret = qemuProcessStart(dconn, driver, vm, migrateFrom, true, - -1, NULL, VIR_VM_OP_MIGRATE_IN_START); - VIR_FREE(migrateFrom); - if (internalret < 0) { - qemuDomainStartAudit(vm, "migrated", false); - /* Note that we don't set an error here because qemuProcessStart - * should have already done that. - */ - if (!vm->persistent) { - virDomainRemoveInactive(&driver->domains, vm); - vm = NULL; - } - goto endjob; - } - - if (virFDStreamConnectUNIX(st, - unixfile, - false) < 0) { - qemuDomainStartAudit(vm, "migrated", false); - qemuProcessStop(driver, vm, 0); - if (!vm->persistent) { - if (qemuDomainObjEndJob(vm) > 0) - virDomainRemoveInactive(&driver->domains, vm); - vm = NULL; - } - virReportSystemError(errno, - _("cannot open unix socket '%s' for tunnelled migration"), - unixfile); - goto endjob; - } - - qemuDomainStartAudit(vm, "migrated", true); - - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_STARTED, - VIR_DOMAIN_EVENT_STARTED_MIGRATED); - ret = 0; - -endjob: - if (vm && - qemuDomainObjEndJob(vm) == 0) - vm = NULL; - - /* We set a fake job active which is held across - * API calls until the finish() call. This prevents - * any other APIs being invoked while incoming - * migration is taking place - */ - if (vm && - virDomainObjIsActive(vm)) { - priv->jobActive = QEMU_JOB_MIGRATION_IN; - priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; - priv->jobStart = timeval_to_ms(now); - } + qemuDriverLock(driver); + ret = qemuMigrationPrepareTunnel(driver, dconn, st, + dname, dom_xml); + qemuDriverUnlock(driver); cleanup: - virDomainDefFree(def); - if (unixfile) - unlink(unixfile); - VIR_FREE(unixfile); - if (vm) - virDomainObjUnlock(vm); - if (event) - qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); return ret; } @@ -5764,25 +5401,8 @@ qemudDomainMigratePrepare2 (virConnectPtr dconn, unsigned long resource ATTRIBUTE_UNUSED, const char *dom_xml) { - static int port = 0; struct qemud_driver *driver = dconn->privateData; - virDomainDefPtr def = NULL; - virDomainObjPtr vm = NULL; - int this_port; - char *hostname = NULL; - char migrateFrom [64]; - const char *p; - virDomainEventPtr event = NULL; int ret = -1; - int internalret; - qemuDomainObjPrivatePtr priv = NULL; - struct timeval now; - - if (gettimeofday(&now, NULL) < 0) { - virReportSystemError(errno, "%s", - _("cannot get time of day")); - return -1; - } virCheckFlags(VIR_MIGRATE_LIVE | VIR_MIGRATE_PEER2PEER | @@ -5811,790 +5431,58 @@ qemudDomainMigratePrepare2 (virConnectPtr dconn, goto cleanup; } - /* The URI passed in may be NULL or a string "tcp://somehostname:port". - * - * If the URI passed in is NULL then we allocate a port number - * from our pool of port numbers and return a URI of - * "tcp://ourhostname:port". - * - * If the URI passed in is not NULL then we try to parse out the - * port number and use that (note that the hostname is assumed - * to be a correct hostname which refers to the target machine). - */ - if (uri_in == NULL) { - this_port = QEMUD_MIGRATION_FIRST_PORT + port++; - if (port == QEMUD_MIGRATION_NUM_PORTS) port = 0; + ret = qemuMigrationPrepareDirect(driver, dconn, + uri_in, uri_out, + dname, dom_xml); - /* Get hostname */ - if ((hostname = virGetHostname(NULL)) == NULL) - goto cleanup; +cleanup: + qemuDriverUnlock(driver); + return ret; +} - if (STRPREFIX(hostname, "localhost")) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("hostname on destination resolved to localhost, but migration requires an FQDN")); - goto cleanup; - } - /* XXX this really should have been a properly well-formed - * URI, but we can't add in tcp:// now without breaking - * compatability with old targets. We at least make the - * new targets accept both syntaxes though. - */ - /* Caller frees */ - internalret = virAsprintf(uri_out, "tcp:%s:%d", hostname, this_port); - if (internalret < 0) { - virReportOOMError(); - goto cleanup; - } - } else { - /* Check the URI starts with "tcp:". We will escape the - * URI when passing it to the qemu monitor, so bad - * characters in hostname part don't matter. - */ - if (!STRPREFIX (uri_in, "tcp:")) { - qemuReportError (VIR_ERR_INVALID_ARG, - "%s", _("only tcp URIs are supported for KVM/QEMU migrations")); - goto cleanup; - } +/* Perform is the second step, and it runs on the source host. */ +static int +qemudDomainMigratePerform (virDomainPtr dom, + const char *cookie ATTRIBUTE_UNUSED, + int cookielen ATTRIBUTE_UNUSED, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long resource) +{ + struct qemud_driver *driver = dom->conn->privateData; + virDomainObjPtr vm; + int ret = -1; - /* Get the port number. */ - p = strrchr (uri_in, ':'); - if (p == strchr(uri_in, ':')) { - /* Generate a port */ - this_port = QEMUD_MIGRATION_FIRST_PORT + port++; - if (port == QEMUD_MIGRATION_NUM_PORTS) - port = 0; + virCheckFlags(VIR_MIGRATE_LIVE | + VIR_MIGRATE_PEER2PEER | + VIR_MIGRATE_TUNNELLED | + VIR_MIGRATE_PERSIST_DEST | + VIR_MIGRATE_UNDEFINE_SOURCE | + VIR_MIGRATE_PAUSED | + VIR_MIGRATE_NON_SHARED_DISK | + VIR_MIGRATE_NON_SHARED_INC, -1); - /* Caller frees */ - if (virAsprintf(uri_out, "%s:%d", uri_in, this_port) < 0) { - virReportOOMError(); - goto cleanup; - } + qemuDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(dom->uuid, uuidstr); + qemuReportError(VIR_ERR_NO_DOMAIN, + _("no domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } - } else { - p++; /* definitely has a ':' in it, see above */ - this_port = virParseNumber (&p); - if (this_port == -1 || p-uri_in != strlen (uri_in)) { - qemuReportError(VIR_ERR_INVALID_ARG, - "%s", _("URI ended with incorrect ':port'")); - goto cleanup; - } - } - } - - if (*uri_out) - VIR_DEBUG("Generated uri_out=%s", *uri_out); - - /* Parse the domain XML. */ - if (!(def = virDomainDefParseString(driver->caps, dom_xml, - VIR_DOMAIN_XML_INACTIVE))) { - qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("failed to parse XML")); - goto cleanup; - } - - if (!qemuDomainIsMigratable(def)) - goto cleanup; - - /* Target domain name, maybe renamed. */ - if (dname) { - VIR_FREE(def->name); - def->name = strdup(dname); - if (def->name == NULL) - goto cleanup; - } - - if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) - goto cleanup; - - if (!(vm = virDomainAssignDef(driver->caps, - &driver->domains, - def, true))) { - /* virDomainAssignDef already set the error */ - goto cleanup; - } - def = NULL; - priv = vm->privateData; - - if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) - goto cleanup; - priv->jobActive = QEMU_JOB_MIGRATION_OUT; - - /* Domain starts inactive, even if the domain XML had an id field. */ - vm->def->id = -1; - - /* Start the QEMU daemon, with the same command-line arguments plus - * -incoming tcp:0.0.0.0:port - */ - snprintf (migrateFrom, sizeof (migrateFrom), "tcp:0.0.0.0:%d", this_port); - if (qemuProcessStart(dconn, driver, vm, migrateFrom, true, - -1, NULL, VIR_VM_OP_MIGRATE_IN_START) < 0) { - qemuDomainStartAudit(vm, "migrated", false); - /* Note that we don't set an error here because qemuProcessStart - * should have already done that. - */ - if (!vm->persistent) { - if (qemuDomainObjEndJob(vm) > 0) - virDomainRemoveInactive(&driver->domains, vm); - vm = NULL; - } - goto endjob; - } - - qemuDomainStartAudit(vm, "migrated", true); - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_STARTED, - VIR_DOMAIN_EVENT_STARTED_MIGRATED); - ret = 0; - -endjob: - if (vm && - qemuDomainObjEndJob(vm) == 0) - vm = NULL; - - /* We set a fake job active which is held across - * API calls until the finish() call. This prevents - * any other APIs being invoked while incoming - * migration is taking place - */ - if (vm && - virDomainObjIsActive(vm)) { - priv->jobActive = QEMU_JOB_MIGRATION_IN; - priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; - priv->jobStart = timeval_to_ms(now); - } - -cleanup: - VIR_FREE(hostname); - virDomainDefFree(def); - if (ret != 0) - VIR_FREE(*uri_out); - if (vm) - virDomainObjUnlock(vm); - if (event) - qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); - return ret; - -} - - -/* Perform migration using QEMU's native TCP migrate support, - * not encrypted obviously - */ -static int doNativeMigrate(struct qemud_driver *driver, - virDomainObjPtr vm, - const char *uri, - unsigned int flags, - const char *dname ATTRIBUTE_UNUSED, - unsigned long resource) -{ - int ret = -1; - xmlURIPtr uribits = NULL; - qemuDomainObjPrivatePtr priv = vm->privateData; - unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; - - /* Issue the migrate command. */ - if (STRPREFIX(uri, "tcp:") && !STRPREFIX(uri, "tcp://")) { - /* HACK: source host generates bogus URIs, so fix them up */ - char *tmpuri; - if (virAsprintf(&tmpuri, "tcp://%s", uri + strlen("tcp:")) < 0) { - virReportOOMError(); - goto cleanup; - } - uribits = xmlParseURI(tmpuri); - VIR_FREE(tmpuri); - } else { - uribits = xmlParseURI(uri); - } - if (!uribits) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("cannot parse URI %s"), uri); - goto cleanup; - } - - qemuDomainObjEnterMonitorWithDriver(driver, vm); - if (resource > 0 && - qemuMonitorSetMigrationSpeed(priv->mon, resource) < 0) { - qemuDomainObjExitMonitorWithDriver(driver, vm); - goto cleanup; - } - - if (flags & VIR_MIGRATE_NON_SHARED_DISK) - background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_DISK; - - if (flags & VIR_MIGRATE_NON_SHARED_INC) - background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_INC; - - if (qemuMonitorMigrateToHost(priv->mon, background_flags, uribits->server, - uribits->port) < 0) { - qemuDomainObjExitMonitorWithDriver(driver, vm); - goto cleanup; - } - qemuDomainObjExitMonitorWithDriver(driver, vm); - - if (qemuDomainWaitForMigrationComplete(driver, vm) < 0) - goto cleanup; - - ret = 0; - -cleanup: - xmlFreeURI(uribits); - return ret; -} - - -#define TUNNEL_SEND_BUF_SIZE 65536 - -static int doTunnelSendAll(virStreamPtr st, - int sock) -{ - char *buffer; - int nbytes = TUNNEL_SEND_BUF_SIZE; - - if (VIR_ALLOC_N(buffer, TUNNEL_SEND_BUF_SIZE) < 0) { - virReportOOMError(); - virStreamAbort(st); - return -1; - } - - /* XXX should honour the 'resource' parameter here */ - for (;;) { - nbytes = saferead(sock, buffer, nbytes); - if (nbytes < 0) { - virReportSystemError(errno, "%s", - _("tunnelled migration failed to read from qemu")); - virStreamAbort(st); - VIR_FREE(buffer); - return -1; - } - else if (nbytes == 0) - /* EOF; get out of here */ - break; - - if (virStreamSend(st, buffer, nbytes) < 0) { - qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", - _("Failed to write migration data to remote libvirtd")); - VIR_FREE(buffer); - return -1; - } - } - - VIR_FREE(buffer); - - if (virStreamFinish(st) < 0) - /* virStreamFinish set the error for us */ - return -1; - - return 0; -} - -static int doTunnelMigrate(virDomainPtr dom, - struct qemud_driver *driver, - virConnectPtr dconn, - virDomainObjPtr vm, - const char *dom_xml, - const char *uri, - unsigned long flags, - const char *dname, - unsigned long resource) -{ - qemuDomainObjPrivatePtr priv = vm->privateData; - int client_sock = -1; - int qemu_sock = -1; - struct sockaddr_un sa_qemu, sa_client; - socklen_t addrlen; - virDomainPtr ddomain = NULL; - int retval = -1; - virStreamPtr st = NULL; - char *unixfile = NULL; - int internalret; - unsigned long long qemuCmdFlags; - int status; - unsigned long long transferred, remaining, total; - unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; - - /* - * The order of operations is important here to avoid touching - * the source VM until we are very sure we can successfully - * start the migration operation. - * - * 1. setup local support infrastructure (eg sockets) - * 2. setup destination fully - * 3. start migration on source - */ - - - /* Stage 1. setup local support infrastructure */ - - if (virAsprintf(&unixfile, "%s/qemu.tunnelmigrate.src.%s", - driver->libDir, vm->def->name) < 0) { - virReportOOMError(); - goto cleanup; - } - - qemu_sock = socket(AF_UNIX, SOCK_STREAM, 0); - if (qemu_sock < 0) { - virReportSystemError(errno, "%s", - _("cannot open tunnelled migration socket")); - goto cleanup; - } - memset(&sa_qemu, 0, sizeof(sa_qemu)); - sa_qemu.sun_family = AF_UNIX; - if (virStrcpy(sa_qemu.sun_path, unixfile, - sizeof(sa_qemu.sun_path)) == NULL) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("Unix socket '%s' too big for destination"), - unixfile); - goto cleanup; - } - unlink(unixfile); - if (bind(qemu_sock, (struct sockaddr *)&sa_qemu, sizeof(sa_qemu)) < 0) { - virReportSystemError(errno, - _("Cannot bind to unix socket '%s' for tunnelled migration"), - unixfile); - goto cleanup; - } - if (listen(qemu_sock, 1) < 0) { - virReportSystemError(errno, - _("Cannot listen on unix socket '%s' for tunnelled migration"), - unixfile); - goto cleanup; - } - - if (chown(unixfile, qemu_driver->user, qemu_driver->group) < 0) { - virReportSystemError(errno, - _("Cannot change unix socket '%s' owner"), - unixfile); - goto cleanup; - } - - /* check that this qemu version supports the unix migration */ - if (qemuCapsExtractVersionInfo(vm->def->emulator, NULL, &qemuCmdFlags) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - _("Cannot extract Qemu version from '%s'"), - vm->def->emulator); - goto cleanup; - } - - if (!(qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_UNIX) && - !(qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_EXEC)) { - qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("Source qemu is too old to support tunnelled migration")); - goto cleanup; - } - - - /* Stage 2. setup destination fully - * - * Once stage 2 has completed successfully, we *must* call finish - * to cleanup the target whether we succeed or fail - */ - st = virStreamNew(dconn, 0); - if (st == NULL) - /* virStreamNew only fails on OOM, and it reports the error itself */ - goto cleanup; - - qemuDomainObjEnterRemoteWithDriver(driver, vm); - internalret = dconn->driver->domainMigratePrepareTunnel(dconn, st, - flags, dname, - resource, dom_xml); - qemuDomainObjExitRemoteWithDriver(driver, vm); - - if (internalret < 0) - /* domainMigratePrepareTunnel sets the error for us */ - goto cleanup; - - /* the domain may have shutdown or crashed while we had the locks dropped - * in qemuDomainObjEnterRemoteWithDriver, so check again - */ - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - - /* 3. start migration on source */ - qemuDomainObjEnterMonitorWithDriver(driver, vm); - if (flags & VIR_MIGRATE_NON_SHARED_DISK) - background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_DISK; - if (flags & VIR_MIGRATE_NON_SHARED_INC) - background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_INC; - if (qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_UNIX){ - internalret = qemuMonitorMigrateToUnix(priv->mon, background_flags, - unixfile); - } - else if (qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_EXEC) { - const char *args[] = { "nc", "-U", unixfile, NULL }; - internalret = qemuMonitorMigrateToCommand(priv->mon, QEMU_MONITOR_MIGRATE_BACKGROUND, args); - } else { - internalret = -1; - } - qemuDomainObjExitMonitorWithDriver(driver, vm); - if (internalret < 0) { - qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("tunnelled migration monitor command failed")); - goto finish; - } - - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - - /* From this point onwards we *must* call cancel to abort the - * migration on source if anything goes wrong */ - - /* it is also possible that the migrate didn't fail initially, but - * rather failed later on. Check the output of "info migrate" - */ - qemuDomainObjEnterMonitorWithDriver(driver, vm); - if (qemuMonitorGetMigrationStatus(priv->mon, - &status, - &transferred, - &remaining, - &total) < 0) { - qemuDomainObjExitMonitorWithDriver(driver, vm); - goto cancel; - } - qemuDomainObjExitMonitorWithDriver(driver, vm); - - if (status == QEMU_MONITOR_MIGRATION_STATUS_ERROR) { - qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s",_("migrate failed")); - goto cancel; - } - - addrlen = sizeof(sa_client); - while ((client_sock = accept(qemu_sock, (struct sockaddr *)&sa_client, &addrlen)) < 0) { - if (errno == EAGAIN || errno == EINTR) - continue; - virReportSystemError(errno, "%s", - _("tunnelled migration failed to accept from qemu")); - goto cancel; - } - - retval = doTunnelSendAll(st, client_sock); - -cancel: - if (retval != 0 && virDomainObjIsActive(vm)) { - qemuDomainObjEnterMonitorWithDriver(driver, vm); - qemuMonitorMigrateCancel(priv->mon); - qemuDomainObjExitMonitorWithDriver(driver, vm); - } - -finish: - dname = dname ? dname : dom->name; - qemuDomainObjEnterRemoteWithDriver(driver, vm); - ddomain = dconn->driver->domainMigrateFinish2 - (dconn, dname, NULL, 0, uri, flags, retval); - qemuDomainObjExitRemoteWithDriver(driver, vm); - -cleanup: - VIR_FORCE_CLOSE(client_sock); - VIR_FORCE_CLOSE(qemu_sock); - - if (ddomain) - virUnrefDomain(ddomain); - - if (unixfile) { - unlink(unixfile); - VIR_FREE(unixfile); - } - - if (st) - /* don't call virStreamFree(), because that resets any pending errors */ - virUnrefStream(st); - return retval; -} - - -/* This is essentially a simplified re-impl of - * virDomainMigrateVersion2 from libvirt.c, but running in source - * libvirtd context, instead of client app context */ -static int doNonTunnelMigrate(virDomainPtr dom, - struct qemud_driver *driver, - virConnectPtr dconn, - virDomainObjPtr vm, - const char *dom_xml, - const char *uri ATTRIBUTE_UNUSED, - unsigned long flags, - const char *dname, - unsigned long resource) -{ - virDomainPtr ddomain = NULL; - int retval = -1; - char *uri_out = NULL; - int rc; - - qemuDomainObjEnterRemoteWithDriver(driver, vm); - /* NB we don't pass 'uri' into this, since that's the libvirtd - * URI in this context - so we let dest pick it */ - rc = dconn->driver->domainMigratePrepare2(dconn, - NULL, /* cookie */ - 0, /* cookielen */ - NULL, /* uri */ - &uri_out, - flags, dname, - resource, dom_xml); - qemuDomainObjExitRemoteWithDriver(driver, vm); - if (rc < 0) - /* domainMigratePrepare2 sets the error for us */ - goto cleanup; - - /* the domain may have shutdown or crashed while we had the locks dropped - * in qemuDomainObjEnterRemoteWithDriver, so check again - */ - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - - if (uri_out == NULL) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("domainMigratePrepare2 did not set uri")); - goto cleanup; - } - - if (doNativeMigrate(driver, vm, uri_out, flags, dname, resource) < 0) - goto finish; - - retval = 0; - -finish: - dname = dname ? dname : dom->name; - qemuDomainObjEnterRemoteWithDriver(driver, vm); - ddomain = dconn->driver->domainMigrateFinish2 - (dconn, dname, NULL, 0, uri_out, flags, retval); - qemuDomainObjExitRemoteWithDriver(driver, vm); - - if (ddomain) - virUnrefDomain(ddomain); - -cleanup: - return retval; -} - - -static int doPeer2PeerMigrate(virDomainPtr dom, - struct qemud_driver *driver, - virDomainObjPtr vm, - const char *uri, - unsigned long flags, - const char *dname, - unsigned long resource) -{ - int ret = -1; - virConnectPtr dconn = NULL; - char *dom_xml; - bool p2p; - - /* the order of operations is important here; we make sure the - * destination side is completely setup before we touch the source - */ - - qemuDomainObjEnterRemoteWithDriver(driver, vm); - dconn = virConnectOpen(uri); - qemuDomainObjExitRemoteWithDriver(driver, vm); - if (dconn == NULL) { - qemuReportError(VIR_ERR_OPERATION_FAILED, - _("Failed to connect to remote libvirt URI %s"), uri); - return -1; - } - - qemuDomainObjEnterRemoteWithDriver(driver, vm); - p2p = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, - VIR_DRV_FEATURE_MIGRATION_P2P); - qemuDomainObjExitRemoteWithDriver(driver, vm); - if (!p2p) { - qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", - _("Destination libvirt does not support peer-to-peer migration protocol")); - goto cleanup; - } - - /* domain may have been stopped while we were talking to remote daemon */ - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - - dom_xml = qemudVMDumpXML(driver, vm, - VIR_DOMAIN_XML_SECURE | - VIR_DOMAIN_XML_UPDATE_CPU); - if (!dom_xml) { - qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("failed to get domain xml")); - goto cleanup; - } - - if (flags & VIR_MIGRATE_TUNNELLED) - ret = doTunnelMigrate(dom, driver, dconn, vm, dom_xml, uri, flags, dname, resource); - else - ret = doNonTunnelMigrate(dom, driver, dconn, vm, dom_xml, uri, flags, dname, resource); - -cleanup: - VIR_FREE(dom_xml); - /* don't call virConnectClose(), because that resets any pending errors */ - qemuDomainObjEnterRemoteWithDriver(driver, vm); - virUnrefConnect(dconn); - qemuDomainObjExitRemoteWithDriver(driver, vm); - - return ret; -} - - -/* Perform is the second step, and it runs on the source host. */ -static int -qemudDomainMigratePerform (virDomainPtr dom, - const char *cookie ATTRIBUTE_UNUSED, - int cookielen ATTRIBUTE_UNUSED, - const char *uri, - unsigned long flags, - const char *dname, - unsigned long resource) -{ - struct qemud_driver *driver = dom->conn->privateData; - virDomainObjPtr vm; - virDomainEventPtr event = NULL; - int ret = -1; - int resume = 0; - qemuDomainObjPrivatePtr priv; - - virCheckFlags(VIR_MIGRATE_LIVE | - VIR_MIGRATE_PEER2PEER | - VIR_MIGRATE_TUNNELLED | - VIR_MIGRATE_PERSIST_DEST | - VIR_MIGRATE_UNDEFINE_SOURCE | - VIR_MIGRATE_PAUSED | - VIR_MIGRATE_NON_SHARED_DISK | - VIR_MIGRATE_NON_SHARED_INC, -1); - - qemuDriverLock(driver); - vm = virDomainFindByUUID(&driver->domains, dom->uuid); - if (!vm) { - char uuidstr[VIR_UUID_STRING_BUFLEN]; - virUUIDFormat(dom->uuid, uuidstr); - qemuReportError(VIR_ERR_NO_DOMAIN, - _("no domain with matching uuid '%s'"), uuidstr); - goto cleanup; - } - priv = vm->privateData; - - if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) - goto cleanup; - priv->jobActive = QEMU_JOB_MIGRATION_OUT; - - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_OPERATION_INVALID, - "%s", _("domain is not running")); - goto endjob; - } - - memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); - priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; - - resume = vm->state == VIR_DOMAIN_RUNNING; - if (!(flags & VIR_MIGRATE_LIVE) && vm->state == VIR_DOMAIN_RUNNING) { - if (qemuDomainMigrateOffline(driver, vm) < 0) - goto endjob; - } - - if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { - if (doPeer2PeerMigrate(dom, driver, vm, uri, flags, dname, resource) < 0) - /* doPeer2PeerMigrate already set the error, so just get out */ - goto endjob; - } else { - if (doNativeMigrate(driver, vm, uri, flags, dname, resource) < 0) - goto endjob; - } - - /* Clean up the source domain. */ - qemuProcessStop(driver, vm, 1); - qemuDomainStopAudit(vm, "migrated"); - resume = 0; - - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_STOPPED, - VIR_DOMAIN_EVENT_STOPPED_MIGRATED); - if (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE)) { - virDomainDeleteConfig(driver->configDir, driver->autostartDir, vm); - if (qemuDomainObjEndJob(vm) > 0) - virDomainRemoveInactive(&driver->domains, vm); - vm = NULL; - } - ret = 0; - -endjob: - if (resume && vm->state == VIR_DOMAIN_PAUSED) { - /* we got here through some sort of failure; start the domain again */ - if (qemuProcessStartCPUs(driver, vm, dom->conn) < 0) { - /* Hm, we already know we are in error here. We don't want to - * overwrite the previous error, though, so we just throw something - * to the logs and hope for the best - */ - VIR_ERROR(_("Failed to resume guest %s after failure"), - vm->def->name); - } - - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_RESUMED, - VIR_DOMAIN_EVENT_RESUMED_MIGRATED); - } - if (vm && - qemuDomainObjEndJob(vm) == 0) - vm = NULL; + ret = qemuMigrationPerform(driver, dom->conn, vm, + uri, flags, + dname, resource); cleanup: - if (vm) - virDomainObjUnlock(vm); - if (event) - qemuDomainEventQueue(driver, event); qemuDriverUnlock(driver); return ret; } -#if WITH_MACVTAP -static void -qemudVPAssociatePortProfiles(virDomainDefPtr def) { - int i; - int last_good_net = -1; - virDomainNetDefPtr net; - - for (i = 0; i < def->nnets; i++) { - net = def->nets[i]; - if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { - if (vpAssociatePortProfileId(net->ifname, - net->mac, - net->data.direct.linkdev, - &net->data.direct.virtPortProfile, - def->uuid, - VIR_VM_OP_MIGRATE_IN_FINISH) != 0) - goto err_exit; - } - last_good_net = i; - } - - return; - -err_exit: - for (i = 0; i < last_good_net; i++) { - net = def->nets[i]; - if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { - vpDisassociatePortProfileId(net->ifname, - net->mac, - net->data.direct.linkdev, - &net->data.direct.virtPortProfile, - VIR_VM_OP_MIGRATE_IN_FINISH); - } - } -} -#else /* !WITH_MACVTAP */ -static void -qemudVPAssociatePortProfiles(virDomainDefPtr def ATTRIBUTE_UNUSED) { } -#endif /* WITH_MACVTAP */ /* Finish is the third and final step, and it runs on the destination host. */ static virDomainPtr @@ -6609,10 +5497,7 @@ qemudDomainMigrateFinish2 (virConnectPtr dconn, struct qemud_driver *driver = dconn->privateData; virDomainObjPtr vm; virDomainPtr dom = NULL; - virDomainEventPtr event = NULL; virErrorPtr orig_err; - int newVM = 1; - qemuDomainObjPrivatePtr priv = NULL; virCheckFlags(VIR_MIGRATE_LIVE | VIR_MIGRATE_PEER2PEER | @@ -6634,118 +5519,18 @@ qemudDomainMigrateFinish2 (virConnectPtr dconn, goto cleanup; } - priv = vm->privateData; - if (priv->jobActive != QEMU_JOB_MIGRATION_IN) { - qemuReportError(VIR_ERR_NO_DOMAIN, - _("domain '%s' is not processing incoming migration"), dname); - goto cleanup; - } - priv->jobActive = QEMU_JOB_NONE; - memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); - - if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) - goto cleanup; - - /* Did the migration go as planned? If yes, return the domain - * object, but if no, clean up the empty qemu process. - */ - if (retcode == 0) { - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - - qemudVPAssociatePortProfiles(vm->def); - - if (flags & VIR_MIGRATE_PERSIST_DEST) { - if (vm->persistent) - newVM = 0; - vm->persistent = 1; - - if (virDomainSaveConfig(driver->configDir, vm->def) < 0) { - /* Hmpf. Migration was successful, but making it persistent - * was not. If we report successful, then when this domain - * shuts down, management tools are in for a surprise. On the - * other hand, if we report failure, then the management tools - * might try to restart the domain on the source side, even - * though the domain is actually running on the destination. - * Return a NULL dom pointer, and hope that this is a rare - * situation and management tools are smart. - */ - vm = NULL; - goto endjob; - } - - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_DEFINED, - newVM ? - VIR_DOMAIN_EVENT_DEFINED_ADDED : - VIR_DOMAIN_EVENT_DEFINED_UPDATED); - if (event) - qemuDomainEventQueue(driver, event); - event = NULL; - - } - dom = virGetDomain (dconn, vm->def->name, vm->def->uuid); - - if (!(flags & VIR_MIGRATE_PAUSED)) { - /* run 'cont' on the destination, which allows migration on qemu - * >= 0.10.6 to work properly. This isn't strictly necessary on - * older qemu's, but it also doesn't hurt anything there - */ - if (qemuProcessStartCPUs(driver, vm, dconn) < 0) { - if (virGetLastError() == NULL) - qemuReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("resume operation failed")); - goto endjob; - } - } - - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_RESUMED, - VIR_DOMAIN_EVENT_RESUMED_MIGRATED); - if (vm->state == VIR_DOMAIN_PAUSED) { - qemuDomainEventQueue(driver, event); - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_SUSPENDED, - VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); - } - if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { - VIR_WARN("Failed to save status on vm %s", vm->def->name); - goto endjob; - } - } else { - qemuProcessStop(driver, vm, 1); - qemuDomainStopAudit(vm, "failed"); - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_STOPPED, - VIR_DOMAIN_EVENT_STOPPED_FAILED); - if (!vm->persistent) { - if (qemuDomainObjEndJob(vm) > 0) - virDomainRemoveInactive(&driver->domains, vm); - vm = NULL; - } - } - -endjob: - if (vm && - qemuDomainObjEndJob(vm) == 0) - vm = NULL; + dom = qemuMigrationFinish(driver, dconn, vm, flags, retcode); cleanup: if (orig_err) { virSetError(orig_err); virFreeError(orig_err); } - if (vm) - virDomainObjUnlock(vm); - if (event) - qemuDomainEventQueue(driver, event); qemuDriverUnlock(driver); return dom; } + static int qemudNodeDeviceGetPciInfo (virNodeDevicePtr dev, unsigned *domain, diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c new file mode 100644 index 0000000..8d23cc5 --- /dev/null +++ b/src/qemu/qemu_migration.c @@ -0,0 +1,1295 @@ +/* + * qemu_migration.c: QEMU migration handling + * + * Copyright (C) 2006-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <config.h> + +#include <sys/time.h> + +#include "qemu_migration.h" +#include "qemu_monitor.h" +#include "qemu_domain.h" +#include "qemu_process.h" +#include "qemu_capabilities.h" +#include "qemu_audit.h" + +#include "logging.h" +#include "virterror_internal.h" +#include "memory.h" +#include "util.h" +#include "files.h" +#include "datatypes.h" +#include "fdstream.h" + +#define VIR_FROM_THIS VIR_FROM_QEMU + +#define timeval_to_ms(tv) (((tv).tv_sec * 1000ull) + ((tv).tv_usec / 1000)) + + +bool +qemuMigrationIsAllowed(virDomainDefPtr def) +{ + if (def->nhostdevs > 0) { + qemuReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("Domain with assigned host devices cannot be migrated")); + return false; + } + + return true; +} + +/** qemuMigrationSetOffline + * Pause domain for non-live migration. + */ +int +qemuMigrationSetOffline(struct qemud_driver *driver, + virDomainObjPtr vm) +{ + int ret; + + ret = qemuProcessStopCPUs(driver, vm); + if (ret == 0) { + virDomainEventPtr event; + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_SUSPENDED, + VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED); + if (event) + qemuDomainEventQueue(driver, event); + } + + return ret; +} + + +int +qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm) +{ + int ret = -1; + int status; + unsigned long long memProcessed; + unsigned long long memRemaining; + unsigned long long memTotal; + qemuDomainObjPrivatePtr priv = vm->privateData; + + priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; + + while (priv->jobInfo.type == VIR_DOMAIN_JOB_UNBOUNDED) { + /* Poll every 50ms for progress & to allow cancellation */ + struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull }; + struct timeval now; + int rc; + const char *job; + + switch (priv->jobActive) { + case QEMU_JOB_MIGRATION_OUT: + job = _("migration job"); + break; + case QEMU_JOB_SAVE: + job = _("domain save job"); + break; + case QEMU_JOB_DUMP: + job = _("domain core dump job"); + break; + default: + job = _("job"); + } + + + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"), + job, _("guest unexpectedly quit")); + goto cleanup; + } + + if (priv->jobSignals & QEMU_JOB_SIGNAL_CANCEL) { + priv->jobSignals ^= QEMU_JOB_SIGNAL_CANCEL; + VIR_DEBUG0("Cancelling job at client request"); + qemuDomainObjEnterMonitorWithDriver(driver, vm); + rc = qemuMonitorMigrateCancel(priv->mon); + qemuDomainObjExitMonitorWithDriver(driver, vm); + if (rc < 0) { + VIR_WARN0("Unable to cancel job"); + } + } else if (priv->jobSignals & QEMU_JOB_SIGNAL_SUSPEND) { + priv->jobSignals ^= QEMU_JOB_SIGNAL_SUSPEND; + VIR_DEBUG0("Pausing domain for non-live migration"); + if (qemuMigrationSetOffline(driver, vm) < 0) + VIR_WARN0("Unable to pause domain"); + } else if (priv->jobSignals & QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME) { + unsigned long long ms = priv->jobSignalsData.migrateDowntime; + + priv->jobSignals ^= QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME; + priv->jobSignalsData.migrateDowntime = 0; + VIR_DEBUG("Setting migration downtime to %llums", ms); + qemuDomainObjEnterMonitorWithDriver(driver, vm); + rc = qemuMonitorSetMigrationDowntime(priv->mon, ms); + qemuDomainObjExitMonitorWithDriver(driver, vm); + if (rc < 0) + VIR_WARN0("Unable to set migration downtime"); + } + + /* Repeat check because the job signals might have caused + * guest to die + */ + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"), + job, _("guest unexpectedly quit")); + goto cleanup; + } + + qemuDomainObjEnterMonitorWithDriver(driver, vm); + rc = qemuMonitorGetMigrationStatus(priv->mon, + &status, + &memProcessed, + &memRemaining, + &memTotal); + qemuDomainObjExitMonitorWithDriver(driver, vm); + + if (rc < 0) { + priv->jobInfo.type = VIR_DOMAIN_JOB_FAILED; + goto cleanup; + } + + if (gettimeofday(&now, NULL) < 0) { + priv->jobInfo.type = VIR_DOMAIN_JOB_FAILED; + virReportSystemError(errno, "%s", + _("cannot get time of day")); + goto cleanup; + } + priv->jobInfo.timeElapsed = timeval_to_ms(now) - priv->jobStart; + + switch (status) { + case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: + priv->jobInfo.type = VIR_DOMAIN_JOB_NONE; + qemuReportError(VIR_ERR_OPERATION_FAILED, + _("%s: %s"), job, _("is not active")); + break; + + case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE: + priv->jobInfo.dataTotal = memTotal; + priv->jobInfo.dataRemaining = memRemaining; + priv->jobInfo.dataProcessed = memProcessed; + + priv->jobInfo.memTotal = memTotal; + priv->jobInfo.memRemaining = memRemaining; + priv->jobInfo.memProcessed = memProcessed; + break; + + case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: + priv->jobInfo.type = VIR_DOMAIN_JOB_COMPLETED; + ret = 0; + break; + + case QEMU_MONITOR_MIGRATION_STATUS_ERROR: + priv->jobInfo.type = VIR_DOMAIN_JOB_FAILED; + qemuReportError(VIR_ERR_OPERATION_FAILED, + _("%s: %s"), job, _("unexpectedly failed")); + break; + + case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED: + priv->jobInfo.type = VIR_DOMAIN_JOB_CANCELLED; + qemuReportError(VIR_ERR_OPERATION_FAILED, + _("%s: %s"), job, _("canceled by client")); + break; + } + + virDomainObjUnlock(vm); + qemuDriverUnlock(driver); + + nanosleep(&ts, NULL); + + qemuDriverLock(driver); + virDomainObjLock(vm); + } + +cleanup: + return ret; +} + + +/* Prepare is the first step, and it runs on the destination host. + * + * This version starts an empty VM listening on a localhost TCP port, and + * sets up the corresponding virStream to handle the incoming data. + */ +int +qemuMigrationPrepareTunnel(struct qemud_driver *driver, + virConnectPtr dconn, + virStreamPtr st, + const char *dname, + const char *dom_xml) +{ + virDomainDefPtr def = NULL; + virDomainObjPtr vm = NULL; + char *migrateFrom; + virDomainEventPtr event = NULL; + int ret = -1; + int internalret; + char *unixfile = NULL; + unsigned long long qemuCmdFlags; + qemuDomainObjPrivatePtr priv = NULL; + struct timeval now; + + if (gettimeofday(&now, NULL) < 0) { + virReportSystemError(errno, "%s", + _("cannot get time of day")); + return -1; + } + + /* Parse the domain XML. */ + if (!(def = virDomainDefParseString(driver->caps, dom_xml, + VIR_DOMAIN_XML_INACTIVE))) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("failed to parse XML")); + goto cleanup; + } + + if (!qemuMigrationIsAllowed(def)) + goto cleanup; + + /* Target domain name, maybe renamed. */ + if (dname) { + VIR_FREE(def->name); + def->name = strdup(dname); + if (def->name == NULL) + goto cleanup; + } + + if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) + goto cleanup; + + if (!(vm = virDomainAssignDef(driver->caps, + &driver->domains, + def, true))) { + /* virDomainAssignDef already set the error */ + goto cleanup; + } + def = NULL; + priv = vm->privateData; + + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) + goto cleanup; + priv->jobActive = QEMU_JOB_MIGRATION_OUT; + + /* Domain starts inactive, even if the domain XML had an id field. */ + vm->def->id = -1; + + if (virAsprintf(&unixfile, "%s/qemu.tunnelmigrate.dest.%s", + driver->libDir, vm->def->name) < 0) { + virReportOOMError(); + goto endjob; + } + unlink(unixfile); + + /* check that this qemu version supports the interactive exec */ + if (qemuCapsExtractVersionInfo(vm->def->emulator, NULL, &qemuCmdFlags) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot determine QEMU argv syntax %s"), + vm->def->emulator); + goto endjob; + } + if (qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_UNIX) + internalret = virAsprintf(&migrateFrom, "unix:%s", unixfile); + else if (qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_EXEC) + internalret = virAsprintf(&migrateFrom, "exec:nc -U -l %s", unixfile); + else { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("Destination qemu is too old to support tunnelled migration")); + goto endjob; + } + if (internalret < 0) { + virReportOOMError(); + goto endjob; + } + /* Start the QEMU daemon, with the same command-line arguments plus + * -incoming unix:/path/to/file or exec:nc -U /path/to/file + */ + internalret = qemuProcessStart(dconn, driver, vm, migrateFrom, true, + -1, NULL, VIR_VM_OP_MIGRATE_IN_START); + VIR_FREE(migrateFrom); + if (internalret < 0) { + qemuDomainStartAudit(vm, "migrated", false); + /* Note that we don't set an error here because qemuProcessStart + * should have already done that. + */ + if (!vm->persistent) { + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + goto endjob; + } + + if (virFDStreamConnectUNIX(st, + unixfile, + false) < 0) { + qemuDomainStartAudit(vm, "migrated", false); + qemuProcessStop(driver, vm, 0); + if (!vm->persistent) { + if (qemuDomainObjEndJob(vm) > 0) + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + virReportSystemError(errno, + _("cannot open unix socket '%s' for tunnelled migration"), + unixfile); + goto endjob; + } + + qemuDomainStartAudit(vm, "migrated", true); + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STARTED, + VIR_DOMAIN_EVENT_STARTED_MIGRATED); + ret = 0; + +endjob: + if (vm && + qemuDomainObjEndJob(vm) == 0) + vm = NULL; + + /* We set a fake job active which is held across + * API calls until the finish() call. This prevents + * any other APIs being invoked while incoming + * migration is taking place + */ + if (vm && + virDomainObjIsActive(vm)) { + priv->jobActive = QEMU_JOB_MIGRATION_IN; + priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->jobStart = timeval_to_ms(now); + } + +cleanup: + virDomainDefFree(def); + if (unixfile) + unlink(unixfile); + VIR_FREE(unixfile); + if (vm) + virDomainObjUnlock(vm); + if (event) + qemuDomainEventQueue(driver, event); + qemuDriverUnlock(driver); + return ret; +} + + +int +qemuMigrationPrepareDirect(struct qemud_driver *driver, + virConnectPtr dconn, + const char *uri_in, + char **uri_out, + const char *dname, + const char *dom_xml) +{ + static int port = 0; + virDomainDefPtr def = NULL; + virDomainObjPtr vm = NULL; + int this_port; + char *hostname = NULL; + char migrateFrom [64]; + const char *p; + virDomainEventPtr event = NULL; + int ret = -1; + int internalret; + qemuDomainObjPrivatePtr priv = NULL; + struct timeval now; + + if (gettimeofday(&now, NULL) < 0) { + virReportSystemError(errno, "%s", + _("cannot get time of day")); + return -1; + } + + /* The URI passed in may be NULL or a string "tcp://somehostname:port". + * + * If the URI passed in is NULL then we allocate a port number + * from our pool of port numbers and return a URI of + * "tcp://ourhostname:port". + * + * If the URI passed in is not NULL then we try to parse out the + * port number and use that (note that the hostname is assumed + * to be a correct hostname which refers to the target machine). + */ + if (uri_in == NULL) { + this_port = QEMUD_MIGRATION_FIRST_PORT + port++; + if (port == QEMUD_MIGRATION_NUM_PORTS) port = 0; + + /* Get hostname */ + if ((hostname = virGetHostname(NULL)) == NULL) + goto cleanup; + + if (STRPREFIX(hostname, "localhost")) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("hostname on destination resolved to localhost, but migration requires an FQDN")); + goto cleanup; + } + + /* XXX this really should have been a properly well-formed + * URI, but we can't add in tcp:// now without breaking + * compatability with old targets. We at least make the + * new targets accept both syntaxes though. + */ + /* Caller frees */ + internalret = virAsprintf(uri_out, "tcp:%s:%d", hostname, this_port); + if (internalret < 0) { + virReportOOMError(); + goto cleanup; + } + } else { + /* Check the URI starts with "tcp:". We will escape the + * URI when passing it to the qemu monitor, so bad + * characters in hostname part don't matter. + */ + if (!STRPREFIX (uri_in, "tcp:")) { + qemuReportError (VIR_ERR_INVALID_ARG, + "%s", _("only tcp URIs are supported for KVM/QEMU migrations")); + goto cleanup; + } + + /* Get the port number. */ + p = strrchr (uri_in, ':'); + if (p == strchr(uri_in, ':')) { + /* Generate a port */ + this_port = QEMUD_MIGRATION_FIRST_PORT + port++; + if (port == QEMUD_MIGRATION_NUM_PORTS) + port = 0; + + /* Caller frees */ + if (virAsprintf(uri_out, "%s:%d", uri_in, this_port) < 0) { + virReportOOMError(); + goto cleanup; + } + + } else { + p++; /* definitely has a ':' in it, see above */ + this_port = virParseNumber (&p); + if (this_port == -1 || p-uri_in != strlen (uri_in)) { + qemuReportError(VIR_ERR_INVALID_ARG, + "%s", _("URI ended with incorrect ':port'")); + goto cleanup; + } + } + } + + if (*uri_out) + VIR_DEBUG("Generated uri_out=%s", *uri_out); + + /* Parse the domain XML. */ + if (!(def = virDomainDefParseString(driver->caps, dom_xml, + VIR_DOMAIN_XML_INACTIVE))) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("failed to parse XML")); + goto cleanup; + } + + if (!qemuMigrationIsAllowed(def)) + goto cleanup; + + /* Target domain name, maybe renamed. */ + if (dname) { + VIR_FREE(def->name); + def->name = strdup(dname); + if (def->name == NULL) + goto cleanup; + } + + if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) + goto cleanup; + + if (!(vm = virDomainAssignDef(driver->caps, + &driver->domains, + def, true))) { + /* virDomainAssignDef already set the error */ + goto cleanup; + } + def = NULL; + priv = vm->privateData; + + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) + goto cleanup; + priv->jobActive = QEMU_JOB_MIGRATION_OUT; + + /* Domain starts inactive, even if the domain XML had an id field. */ + vm->def->id = -1; + + /* Start the QEMU daemon, with the same command-line arguments plus + * -incoming tcp:0.0.0.0:port + */ + snprintf (migrateFrom, sizeof (migrateFrom), "tcp:0.0.0.0:%d", this_port); + if (qemuProcessStart(dconn, driver, vm, migrateFrom, true, + -1, NULL, VIR_VM_OP_MIGRATE_IN_START) < 0) { + qemuDomainStartAudit(vm, "migrated", false); + /* Note that we don't set an error here because qemuProcessStart + * should have already done that. + */ + if (!vm->persistent) { + if (qemuDomainObjEndJob(vm) > 0) + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + goto endjob; + } + + qemuDomainStartAudit(vm, "migrated", true); + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STARTED, + VIR_DOMAIN_EVENT_STARTED_MIGRATED); + ret = 0; + +endjob: + if (vm && + qemuDomainObjEndJob(vm) == 0) + vm = NULL; + + /* We set a fake job active which is held across + * API calls until the finish() call. This prevents + * any other APIs being invoked while incoming + * migration is taking place + */ + if (vm && + virDomainObjIsActive(vm)) { + priv->jobActive = QEMU_JOB_MIGRATION_IN; + priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; + priv->jobStart = timeval_to_ms(now); + } + +cleanup: + VIR_FREE(hostname); + virDomainDefFree(def); + if (ret != 0) + VIR_FREE(*uri_out); + if (vm) + virDomainObjUnlock(vm); + if (event) + qemuDomainEventQueue(driver, event); + return ret; +} + + +/* Perform migration using QEMU's native TCP migrate support, + * not encrypted obviously + */ +static int doNativeMigrate(struct qemud_driver *driver, + virDomainObjPtr vm, + const char *uri, + unsigned int flags, + const char *dname ATTRIBUTE_UNUSED, + unsigned long resource) +{ + int ret = -1; + xmlURIPtr uribits = NULL; + qemuDomainObjPrivatePtr priv = vm->privateData; + unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; + + /* Issue the migrate command. */ + if (STRPREFIX(uri, "tcp:") && !STRPREFIX(uri, "tcp://")) { + /* HACK: source host generates bogus URIs, so fix them up */ + char *tmpuri; + if (virAsprintf(&tmpuri, "tcp://%s", uri + strlen("tcp:")) < 0) { + virReportOOMError(); + goto cleanup; + } + uribits = xmlParseURI(tmpuri); + VIR_FREE(tmpuri); + } else { + uribits = xmlParseURI(uri); + } + if (!uribits) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot parse URI %s"), uri); + goto cleanup; + } + + qemuDomainObjEnterMonitorWithDriver(driver, vm); + if (resource > 0 && + qemuMonitorSetMigrationSpeed(priv->mon, resource) < 0) { + qemuDomainObjExitMonitorWithDriver(driver, vm); + goto cleanup; + } + + if (flags & VIR_MIGRATE_NON_SHARED_DISK) + background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_DISK; + + if (flags & VIR_MIGRATE_NON_SHARED_INC) + background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_INC; + + if (qemuMonitorMigrateToHost(priv->mon, background_flags, uribits->server, + uribits->port) < 0) { + qemuDomainObjExitMonitorWithDriver(driver, vm); + goto cleanup; + } + qemuDomainObjExitMonitorWithDriver(driver, vm); + + if (qemuMigrationWaitForCompletion(driver, vm) < 0) + goto cleanup; + + ret = 0; + +cleanup: + xmlFreeURI(uribits); + return ret; +} + + +#define TUNNEL_SEND_BUF_SIZE 65536 + +static int doTunnelSendAll(virStreamPtr st, + int sock) +{ + char *buffer; + int nbytes = TUNNEL_SEND_BUF_SIZE; + + if (VIR_ALLOC_N(buffer, TUNNEL_SEND_BUF_SIZE) < 0) { + virReportOOMError(); + virStreamAbort(st); + return -1; + } + + /* XXX should honour the 'resource' parameter here */ + for (;;) { + nbytes = saferead(sock, buffer, nbytes); + if (nbytes < 0) { + virReportSystemError(errno, "%s", + _("tunnelled migration failed to read from qemu")); + virStreamAbort(st); + VIR_FREE(buffer); + return -1; + } + else if (nbytes == 0) + /* EOF; get out of here */ + break; + + if (virStreamSend(st, buffer, nbytes) < 0) { + qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", + _("Failed to write migration data to remote libvirtd")); + VIR_FREE(buffer); + return -1; + } + } + + VIR_FREE(buffer); + + if (virStreamFinish(st) < 0) + /* virStreamFinish set the error for us */ + return -1; + + return 0; +} + +static int doTunnelMigrate(struct qemud_driver *driver, + virConnectPtr dconn, + virDomainObjPtr vm, + const char *dom_xml, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long resource) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int client_sock = -1; + int qemu_sock = -1; + struct sockaddr_un sa_qemu, sa_client; + socklen_t addrlen; + virDomainPtr ddomain = NULL; + int retval = -1; + virStreamPtr st = NULL; + char *unixfile = NULL; + int internalret; + unsigned long long qemuCmdFlags; + int status; + unsigned long long transferred, remaining, total; + unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; + + /* + * The order of operations is important here to avoid touching + * the source VM until we are very sure we can successfully + * start the migration operation. + * + * 1. setup local support infrastructure (eg sockets) + * 2. setup destination fully + * 3. start migration on source + */ + + + /* Stage 1. setup local support infrastructure */ + + if (virAsprintf(&unixfile, "%s/qemu.tunnelmigrate.src.%s", + driver->libDir, vm->def->name) < 0) { + virReportOOMError(); + goto cleanup; + } + + qemu_sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (qemu_sock < 0) { + virReportSystemError(errno, "%s", + _("cannot open tunnelled migration socket")); + goto cleanup; + } + memset(&sa_qemu, 0, sizeof(sa_qemu)); + sa_qemu.sun_family = AF_UNIX; + if (virStrcpy(sa_qemu.sun_path, unixfile, + sizeof(sa_qemu.sun_path)) == NULL) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Unix socket '%s' too big for destination"), + unixfile); + goto cleanup; + } + unlink(unixfile); + if (bind(qemu_sock, (struct sockaddr *)&sa_qemu, sizeof(sa_qemu)) < 0) { + virReportSystemError(errno, + _("Cannot bind to unix socket '%s' for tunnelled migration"), + unixfile); + goto cleanup; + } + if (listen(qemu_sock, 1) < 0) { + virReportSystemError(errno, + _("Cannot listen on unix socket '%s' for tunnelled migration"), + unixfile); + goto cleanup; + } + + if (chown(unixfile, driver->user, driver->group) < 0) { + virReportSystemError(errno, + _("Cannot change unix socket '%s' owner"), + unixfile); + goto cleanup; + } + + /* check that this qemu version supports the unix migration */ + if (qemuCapsExtractVersionInfo(vm->def->emulator, NULL, &qemuCmdFlags) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Cannot extract Qemu version from '%s'"), + vm->def->emulator); + goto cleanup; + } + + if (!(qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_UNIX) && + !(qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_EXEC)) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("Source qemu is too old to support tunnelled migration")); + goto cleanup; + } + + + /* Stage 2. setup destination fully + * + * Once stage 2 has completed successfully, we *must* call finish + * to cleanup the target whether we succeed or fail + */ + st = virStreamNew(dconn, 0); + if (st == NULL) + /* virStreamNew only fails on OOM, and it reports the error itself */ + goto cleanup; + + qemuDomainObjEnterRemoteWithDriver(driver, vm); + internalret = dconn->driver->domainMigratePrepareTunnel(dconn, st, + flags, dname, + resource, dom_xml); + qemuDomainObjExitRemoteWithDriver(driver, vm); + + if (internalret < 0) + /* domainMigratePrepareTunnel sets the error for us */ + goto cleanup; + + /* the domain may have shutdown or crashed while we had the locks dropped + * in qemuDomainObjEnterRemoteWithDriver, so check again + */ + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("guest unexpectedly quit")); + goto cleanup; + } + + /* 3. start migration on source */ + qemuDomainObjEnterMonitorWithDriver(driver, vm); + if (flags & VIR_MIGRATE_NON_SHARED_DISK) + background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_DISK; + if (flags & VIR_MIGRATE_NON_SHARED_INC) + background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_INC; + if (qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_UNIX){ + internalret = qemuMonitorMigrateToUnix(priv->mon, background_flags, + unixfile); + } + else if (qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_EXEC) { + const char *args[] = { "nc", "-U", unixfile, NULL }; + internalret = qemuMonitorMigrateToCommand(priv->mon, QEMU_MONITOR_MIGRATE_BACKGROUND, args); + } else { + internalret = -1; + } + qemuDomainObjExitMonitorWithDriver(driver, vm); + if (internalret < 0) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("tunnelled migration monitor command failed")); + goto finish; + } + + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("guest unexpectedly quit")); + goto cleanup; + } + + /* From this point onwards we *must* call cancel to abort the + * migration on source if anything goes wrong */ + + /* it is also possible that the migrate didn't fail initially, but + * rather failed later on. Check the output of "info migrate" + */ + qemuDomainObjEnterMonitorWithDriver(driver, vm); + if (qemuMonitorGetMigrationStatus(priv->mon, + &status, + &transferred, + &remaining, + &total) < 0) { + qemuDomainObjExitMonitorWithDriver(driver, vm); + goto cancel; + } + qemuDomainObjExitMonitorWithDriver(driver, vm); + + if (status == QEMU_MONITOR_MIGRATION_STATUS_ERROR) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s",_("migrate failed")); + goto cancel; + } + + addrlen = sizeof(sa_client); + while ((client_sock = accept(qemu_sock, (struct sockaddr *)&sa_client, &addrlen)) < 0) { + if (errno == EAGAIN || errno == EINTR) + continue; + virReportSystemError(errno, "%s", + _("tunnelled migration failed to accept from qemu")); + goto cancel; + } + + retval = doTunnelSendAll(st, client_sock); + +cancel: + if (retval != 0 && virDomainObjIsActive(vm)) { + qemuDomainObjEnterMonitorWithDriver(driver, vm); + qemuMonitorMigrateCancel(priv->mon); + qemuDomainObjExitMonitorWithDriver(driver, vm); + } + +finish: + dname = dname ? dname : vm->def->name; + qemuDomainObjEnterRemoteWithDriver(driver, vm); + ddomain = dconn->driver->domainMigrateFinish2 + (dconn, dname, NULL, 0, uri, flags, retval); + qemuDomainObjExitRemoteWithDriver(driver, vm); + +cleanup: + VIR_FORCE_CLOSE(client_sock); + VIR_FORCE_CLOSE(qemu_sock); + + if (ddomain) + virUnrefDomain(ddomain); + + if (unixfile) { + unlink(unixfile); + VIR_FREE(unixfile); + } + + if (st) + /* don't call virStreamFree(), because that resets any pending errors */ + virUnrefStream(st); + return retval; +} + + +/* This is essentially a simplified re-impl of + * virDomainMigrateVersion2 from libvirt.c, but running in source + * libvirtd context, instead of client app context */ +static int doNonTunnelMigrate(struct qemud_driver *driver, + virConnectPtr dconn, + virDomainObjPtr vm, + const char *dom_xml, + const char *uri ATTRIBUTE_UNUSED, + unsigned long flags, + const char *dname, + unsigned long resource) +{ + virDomainPtr ddomain = NULL; + int retval = -1; + char *uri_out = NULL; + int rc; + + qemuDomainObjEnterRemoteWithDriver(driver, vm); + /* NB we don't pass 'uri' into this, since that's the libvirtd + * URI in this context - so we let dest pick it */ + rc = dconn->driver->domainMigratePrepare2(dconn, + NULL, /* cookie */ + 0, /* cookielen */ + NULL, /* uri */ + &uri_out, + flags, dname, + resource, dom_xml); + qemuDomainObjExitRemoteWithDriver(driver, vm); + if (rc < 0) + /* domainMigratePrepare2 sets the error for us */ + goto cleanup; + + /* the domain may have shutdown or crashed while we had the locks dropped + * in qemuDomainObjEnterRemoteWithDriver, so check again + */ + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("guest unexpectedly quit")); + goto cleanup; + } + + if (uri_out == NULL) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("domainMigratePrepare2 did not set uri")); + goto cleanup; + } + + if (doNativeMigrate(driver, vm, uri_out, flags, dname, resource) < 0) + goto finish; + + retval = 0; + +finish: + dname = dname ? dname : vm->def->name; + qemuDomainObjEnterRemoteWithDriver(driver, vm); + ddomain = dconn->driver->domainMigrateFinish2 + (dconn, dname, NULL, 0, uri_out, flags, retval); + qemuDomainObjExitRemoteWithDriver(driver, vm); + + if (ddomain) + virUnrefDomain(ddomain); + +cleanup: + return retval; +} + + +static int doPeer2PeerMigrate(struct qemud_driver *driver, + virDomainObjPtr vm, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long resource) +{ + int ret = -1; + virConnectPtr dconn = NULL; + char *dom_xml; + bool p2p; + + /* the order of operations is important here; we make sure the + * destination side is completely setup before we touch the source + */ + + qemuDomainObjEnterRemoteWithDriver(driver, vm); + dconn = virConnectOpen(uri); + qemuDomainObjExitRemoteWithDriver(driver, vm); + if (dconn == NULL) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + _("Failed to connect to remote libvirt URI %s"), uri); + return -1; + } + + qemuDomainObjEnterRemoteWithDriver(driver, vm); + p2p = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, + VIR_DRV_FEATURE_MIGRATION_P2P); + qemuDomainObjExitRemoteWithDriver(driver, vm); + if (!p2p) { + qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", + _("Destination libvirt does not support peer-to-peer migration protocol")); + goto cleanup; + } + + /* domain may have been stopped while we were talking to remote daemon */ + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("guest unexpectedly quit")); + goto cleanup; + } + + dom_xml = qemuDomainFormatXML(driver, vm, + VIR_DOMAIN_XML_SECURE | + VIR_DOMAIN_XML_UPDATE_CPU); + if (!dom_xml) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("failed to get domain xml")); + goto cleanup; + } + + if (flags & VIR_MIGRATE_TUNNELLED) + ret = doTunnelMigrate(driver, dconn, vm, dom_xml, uri, flags, dname, resource); + else + ret = doNonTunnelMigrate(driver, dconn, vm, dom_xml, uri, flags, dname, resource); + +cleanup: + VIR_FREE(dom_xml); + /* don't call virConnectClose(), because that resets any pending errors */ + qemuDomainObjEnterRemoteWithDriver(driver, vm); + virUnrefConnect(dconn); + qemuDomainObjExitRemoteWithDriver(driver, vm); + + return ret; +} + + +int qemuMigrationPerform(struct qemud_driver *driver, + virConnectPtr conn, + virDomainObjPtr vm, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long resource) +{ + virDomainEventPtr event = NULL; + int ret = -1; + int resume = 0; + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) + goto cleanup; + priv->jobActive = QEMU_JOB_MIGRATION_OUT; + + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto endjob; + } + + memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); + priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; + + resume = vm->state == VIR_DOMAIN_RUNNING; + if (!(flags & VIR_MIGRATE_LIVE) && vm->state == VIR_DOMAIN_RUNNING) { + if (qemuMigrationSetOffline(driver, vm) < 0) + goto endjob; + } + + if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { + if (doPeer2PeerMigrate(driver, vm, uri, flags, dname, resource) < 0) + /* doPeer2PeerMigrate already set the error, so just get out */ + goto endjob; + } else { + if (doNativeMigrate(driver, vm, uri, flags, dname, resource) < 0) + goto endjob; + } + + /* Clean up the source domain. */ + qemuProcessStop(driver, vm, 1); + qemuDomainStopAudit(vm, "migrated"); + resume = 0; + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_MIGRATED); + if (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE)) { + virDomainDeleteConfig(driver->configDir, driver->autostartDir, vm); + if (qemuDomainObjEndJob(vm) > 0) + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + ret = 0; + +endjob: + if (resume && vm->state == VIR_DOMAIN_PAUSED) { + /* we got here through some sort of failure; start the domain again */ + if (qemuProcessStartCPUs(driver, vm, conn) < 0) { + /* Hm, we already know we are in error here. We don't want to + * overwrite the previous error, though, so we just throw something + * to the logs and hope for the best + */ + VIR_ERROR(_("Failed to resume guest %s after failure"), + vm->def->name); + } + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_RESUMED, + VIR_DOMAIN_EVENT_RESUMED_MIGRATED); + } + if (vm && + qemuDomainObjEndJob(vm) == 0) + vm = NULL; + +cleanup: + if (vm) + virDomainObjUnlock(vm); + if (event) + qemuDomainEventQueue(driver, event); + return ret; +} + + +#if WITH_MACVTAP +static void +qemuMigrationVPAssociatePortProfiles(virDomainDefPtr def) { + int i; + int last_good_net = -1; + virDomainNetDefPtr net; + + for (i = 0; i < def->nnets; i++) { + net = def->nets[i]; + if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { + if (vpAssociatePortProfileId(net->ifname, + net->mac, + net->data.direct.linkdev, + &net->data.direct.virtPortProfile, + def->uuid, + VIR_VM_OP_MIGRATE_IN_FINISH) != 0) + goto err_exit; + } + last_good_net = i; + } + + return; + +err_exit: + for (i = 0; i < last_good_net; i++) { + net = def->nets[i]; + if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { + vpDisassociatePortProfileId(net->ifname, + net->mac, + net->data.direct.linkdev, + &net->data.direct.virtPortProfile, + VIR_VM_OP_MIGRATE_IN_FINISH); + } + } +} +#else /* !WITH_MACVTAP */ +static void +qemuMigrationVPAssociatePortProfiles(virDomainDefPtr def ATTRIBUTE_UNUSED) { } +#endif /* WITH_MACVTAP */ + + +virDomainPtr +qemuMigrationFinish(struct qemud_driver *driver, + virConnectPtr dconn, + virDomainObjPtr vm, + unsigned long flags, + int retcode) +{ + virDomainPtr dom = NULL; + virDomainEventPtr event = NULL; + int newVM = 1; + qemuDomainObjPrivatePtr priv = NULL; + + priv = vm->privateData; + if (priv->jobActive != QEMU_JOB_MIGRATION_IN) { + qemuReportError(VIR_ERR_NO_DOMAIN, + _("domain '%s' is not processing incoming migration"), vm->def->name); + goto cleanup; + } + priv->jobActive = QEMU_JOB_NONE; + memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); + + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) + goto cleanup; + + /* Did the migration go as planned? If yes, return the domain + * object, but if no, clean up the empty qemu process. + */ + if (retcode == 0) { + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("guest unexpectedly quit")); + goto cleanup; + } + + qemuMigrationVPAssociatePortProfiles(vm->def); + + if (flags & VIR_MIGRATE_PERSIST_DEST) { + if (vm->persistent) + newVM = 0; + vm->persistent = 1; + + if (virDomainSaveConfig(driver->configDir, vm->def) < 0) { + /* Hmpf. Migration was successful, but making it persistent + * was not. If we report successful, then when this domain + * shuts down, management tools are in for a surprise. On the + * other hand, if we report failure, then the management tools + * might try to restart the domain on the source side, even + * though the domain is actually running on the destination. + * Return a NULL dom pointer, and hope that this is a rare + * situation and management tools are smart. + */ + vm = NULL; + goto endjob; + } + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_DEFINED, + newVM ? + VIR_DOMAIN_EVENT_DEFINED_ADDED : + VIR_DOMAIN_EVENT_DEFINED_UPDATED); + if (event) + qemuDomainEventQueue(driver, event); + event = NULL; + + } + dom = virGetDomain (dconn, vm->def->name, vm->def->uuid); + + if (!(flags & VIR_MIGRATE_PAUSED)) { + /* run 'cont' on the destination, which allows migration on qemu + * >= 0.10.6 to work properly. This isn't strictly necessary on + * older qemu's, but it also doesn't hurt anything there + */ + if (qemuProcessStartCPUs(driver, vm, dconn) < 0) { + if (virGetLastError() == NULL) + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("resume operation failed")); + goto endjob; + } + } + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_RESUMED, + VIR_DOMAIN_EVENT_RESUMED_MIGRATED); + if (vm->state == VIR_DOMAIN_PAUSED) { + qemuDomainEventQueue(driver, event); + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_SUSPENDED, + VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); + } + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { + VIR_WARN("Failed to save status on vm %s", vm->def->name); + goto endjob; + } + } else { + qemuProcessStop(driver, vm, 1); + qemuDomainStopAudit(vm, "failed"); + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_FAILED); + if (!vm->persistent) { + if (qemuDomainObjEndJob(vm) > 0) + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + } + +endjob: + if (vm && + qemuDomainObjEndJob(vm) == 0) + vm = NULL; + +cleanup: + if (vm) + virDomainObjUnlock(vm); + if (event) + qemuDomainEventQueue(driver, event); + return dom; +} diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h new file mode 100644 index 0000000..3cac617 --- /dev/null +++ b/src/qemu/qemu_migration.h @@ -0,0 +1,63 @@ +/* + * qemu_migration.h: QEMU migration handling + * + * Copyright (C) 2006-2011 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __QEMU_MIGRATION_H__ +# define __QEMU_MIGRATION_H__ + +# include "qemu_conf.h" + + +bool qemuMigrationIsAllowed(virDomainDefPtr def) + ATTRIBUTE_NONNULL(1); +int qemuMigrationSetOffline(struct qemud_driver *driver, + virDomainObjPtr vm); + +int qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm); + +int qemuMigrationPrepareTunnel(struct qemud_driver *driver, + virConnectPtr dconn, + virStreamPtr st, + const char *dname, + const char *dom_xml); + +int qemuMigrationPrepareDirect(struct qemud_driver *driver, + virConnectPtr dconn, + const char *uri_in, + char **uri_out, + const char *dname, + const char *dom_xml); + +int qemuMigrationPerform(struct qemud_driver *driver, + virConnectPtr conn, + virDomainObjPtr vm, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long resource); + +virDomainPtr qemuMigrationFinish(struct qemud_driver *driver, + virConnectPtr dconn, + virDomainObjPtr vm, + unsigned long flags, + int retcode); + + +#endif /* __QEMU_MIGRATION_H__ */ -- 1.7.4

On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
The introduction of the v3 migration protocol, along with support for migration cookies, will significantly expand the size of the migration code. Move it all to a separate file to make it more manageable
The functions are not moved 100%. The API entry points remain in the main QEMU driver, but once the public virDomainPtr is resolved to the internal virDomainObjPtr, all following code is moved.
This will allow the new v3 API entry points to call into the same shared internal migration functions
* src/qemu/qemu_domain.c, src/qemu/qemu_domain.h: Add qemuDomainFormatXML helper method * src/qemu/qemu_driver.c: Remove all migration code * src/qemu/qemu_migration.c, src/qemu/qemu_migration.h: Add all migration code. --- po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/qemu/qemu_domain.c | 39 ++ src/qemu/qemu_domain.h | 4 + src/qemu/qemu_driver.c | 1297 ++------------------------------------------- src/qemu/qemu_migration.c | 1295 ++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_migration.h | 63 +++ 7 files changed, 1445 insertions(+), 1257 deletions(-) create mode 100644 src/qemu/qemu_migration.c create mode 100644 src/qemu/qemu_migration.h
You fixed my concerns from v1; however, you missed that commit ee3b030 in the meantime has changed what needed migration.
+ * This version starts an empty VM listening on a localhost TCP port, and + * sets up the corresponding virStream to handle the incoming data. + */ +int +qemuMigrationPrepareTunnel(struct qemud_driver *driver, + virConnectPtr dconn, + virStreamPtr st, + const char *dname, + const char *dom_xml) +{
+ /* Parse the domain XML. */ + if (!(def = virDomainDefParseString(driver->caps, dom_xml, + VIR_DOMAIN_XML_INACTIVE))) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("failed to parse XML")); + goto cleanup;
This needs to be: qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("failed to parse XML")); + "%s", _("failed to parse XML, libvirt version may be " + "different between source and destination host")); ACK with that nit fixed. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On Wed, Feb 09, 2011 at 01:20:39PM -0700, Eric Blake wrote:
On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
The introduction of the v3 migration protocol, along with support for migration cookies, will significantly expand the size of the migration code. Move it all to a separate file to make it more manageable
The functions are not moved 100%. The API entry points remain in the main QEMU driver, but once the public virDomainPtr is resolved to the internal virDomainObjPtr, all following code is moved.
This will allow the new v3 API entry points to call into the same shared internal migration functions
* src/qemu/qemu_domain.c, src/qemu/qemu_domain.h: Add qemuDomainFormatXML helper method * src/qemu/qemu_driver.c: Remove all migration code * src/qemu/qemu_migration.c, src/qemu/qemu_migration.h: Add all migration code. --- po/POTFILES.in | 1 + src/Makefile.am | 3 +- src/qemu/qemu_domain.c | 39 ++ src/qemu/qemu_domain.h | 4 + src/qemu/qemu_driver.c | 1297 ++------------------------------------------- src/qemu/qemu_migration.c | 1295 ++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_migration.h | 63 +++ 7 files changed, 1445 insertions(+), 1257 deletions(-) create mode 100644 src/qemu/qemu_migration.c create mode 100644 src/qemu/qemu_migration.h
You fixed my concerns from v1; however, you missed that commit ee3b030 in the meantime has changed what needed migration.
+ * This version starts an empty VM listening on a localhost TCP port, and + * sets up the corresponding virStream to handle the incoming data. + */ +int +qemuMigrationPrepareTunnel(struct qemud_driver *driver, + virConnectPtr dconn, + virStreamPtr st, + const char *dname, + const char *dom_xml) +{
+ /* Parse the domain XML. */ + if (!(def = virDomainDefParseString(driver->caps, dom_xml, + VIR_DOMAIN_XML_INACTIVE))) { + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("failed to parse XML")); + goto cleanup;
This needs to be:
qemuReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("failed to parse XML")); + "%s", _("failed to parse XML, libvirt version may be " + "different between source and destination host"));
ACK with that nit fixed.
Actually that change should not have been included in the first place. The entire line should have been deleted as per: http://www.redhat.com/archives/libvir-list/2011-January/msg01271.html I'm deleting it in my series Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Migration just seems togo from bad to worse. We already had to introduce a second migration protocol when adding the QEMU driver, since the one from Xen was insufficiently flexible to cope with passing the data the QEMU driver required. It turns out that this protocol still has some flaws that we need to address. The current sequence is * Src: DumpXML - Generate XML to pass to dst * Dst: Prepare - Get ready to accept incoming VM - Generate optional cookie to pass to src * Src: Perform - Start migration and wait for send completion - Kill off VM if successful, resume if failed * Dst: Finish - Wait for recv completion and check status - Kill off VM if unsuccessful The problems with this are: - Since the first step is a generic 'DumpXML' call, we can't add in other migration specific data. eg, we can't include any VM lease data from lock manager plugins - Since the first step is a generic 'DumpXML' call, we can't emit any 'migration begin' event on the source, or have any hook that runs right at the start of the process - Since there is no final step on the source, if the Finish method fails to receive all migration data & has to kill the VM, then there's no way to resume the original VM on the source This patch attempts to introduce a version 3 that uses the improved 5 step sequence * Src: Begin - Generate XML to pass to dst - Generate optional cookie to pass to dst * Dst: Prepare - Get ready to accept incoming VM - Generate optional cookie to pass to src * Src: Perform - Start migration and wait for send completion - Generate optional cookie to pass to dst * Dst: Finish - Wait for recv completion and check status - Kill off VM if failed, resume if success - Generate optional cookie to pass to src * Src: Confirm - Kill off VM if success, resume if failed The API is designed to allow both input and output cookies in all methods where applicable. This lets us pass around arbitrary extra driver specific data between src & dst during migration. Combined with the extra 'Begin' method this lets us pass lease information from source to dst at the start of migration Moving the killing of the source VM out of Perform and into Confirm, means we can now recover if the dst host can't successfully Finish receiving migration data. --- src/driver.h | 77 ++++++- src/esx/esx_driver.c | 6 + src/libvirt.c | 555 ++++++++++++++++++++++++++++++++++++++++++- src/libvirt_internal.h | 66 +++++ src/libvirt_private.syms | 6 + src/lxc/lxc_driver.c | 6 + src/opennebula/one_driver.c | 6 + src/openvz/openvz_driver.c | 6 + src/phyp/phyp_driver.c | 6 + src/qemu/qemu_driver.c | 6 + src/remote/remote_driver.c | 6 + src/test/test_driver.c | 6 + src/uml/uml_driver.c | 6 + src/vbox/vbox_tmpl.c | 6 + src/vmware/vmware_driver.c | 6 + src/xen/xen_driver.c | 6 + src/xenapi/xenapi_driver.c | 6 + 17 files changed, 773 insertions(+), 9 deletions(-) diff --git a/src/driver.h b/src/driver.h index 7451004..27df9d0 100644 --- a/src/driver.h +++ b/src/driver.h @@ -387,7 +387,7 @@ typedef int typedef int (*virDrvDomainMigratePrepareTunnel) - (virConnectPtr conn, + (virConnectPtr dconn, virStreamPtr st, unsigned long flags, const char *dname, @@ -495,6 +495,75 @@ typedef int virStreamPtr st, unsigned int flags); +typedef char * + (*virDrvDomainMigrateBegin3) + (virDomainPtr domain, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource); + +typedef int + (*virDrvDomainMigratePrepare3) + (virConnectPtr dconn, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri_in, + char **uri_out, + unsigned long flags, + const char *dname, + unsigned long resource, + const char *dom_xml); + +typedef int + (*virDrvDomainMigratePrepareTunnel3) + (virConnectPtr dconn, + virStreamPtr st, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource, + const char *dom_xml); + + +typedef int + (*virDrvDomainMigratePerform3) + (virDomainPtr dom, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long resource); + +typedef int + (*virDrvDomainMigrateFinish3) + (virConnectPtr dconn, + const char *dname, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + int cancelled, + virDomainPtr *newdom); + +typedef int + (*virDrvDomainMigrateConfirm3) + (virDomainPtr domain, + const char *cookiein, + int cookieinlen, + unsigned long flags, + int cancelled); /** * _virDriver: @@ -615,6 +684,12 @@ struct _virDriver { virDrvDomainSetMemoryParameters domainSetMemoryParameters; virDrvDomainGetMemoryParameters domainGetMemoryParameters; virDrvDomainOpenConsole domainOpenConsole; + virDrvDomainMigrateBegin3 domainMigrateBegin3; + virDrvDomainMigratePrepare3 domainMigratePrepare3; + virDrvDomainMigratePrepareTunnel3 domainMigratePrepareTunnel3; + virDrvDomainMigratePerform3 domainMigratePerform3; + virDrvDomainMigrateFinish3 domainMigrateFinish3; + virDrvDomainMigrateConfirm3 domainMigrateConfirm3; }; typedef int diff --git a/src/esx/esx_driver.c b/src/esx/esx_driver.c index 97f3dbe..2eac1f6 100644 --- a/src/esx/esx_driver.c +++ b/src/esx/esx_driver.c @@ -4654,6 +4654,12 @@ static virDriver esxDriver = { esxDomainSetMemoryParameters, /* domainSetMemoryParameters */ esxDomainGetMemoryParameters, /* domainGetMemoryParameters */ NULL, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; diff --git a/src/libvirt.c b/src/libvirt.c index 479a9b5..5d6e3a7 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -3177,6 +3177,22 @@ error: } +/* + * Sequence v1: + * + * Dst: Prepare + * - Get ready to accept incoming VM + * - Generate optional cookie to pass to src + * + * Src: Perform + * - Start migration and wait for send completion + * - Kill off VM if successful, resume if failed + * + * Dst: Finish + * - Wait for recv completion and check status + * - Kill off VM if unsuccessful + * + */ static virDomainPtr virDomainMigrateVersion1 (virDomainPtr domain, virConnectPtr dconn, @@ -3246,6 +3262,25 @@ virDomainMigrateVersion1 (virDomainPtr domain, return ddomain; } +/* + * Sequence v2: + * + * Src: DumpXML + * - Generate XML to pass to dst + * + * Dst: Prepare + * - Get ready to accept incoming VM + * - Generate optional cookie to pass to src + * + * Src: Perform + * - Start migration and wait for send completion + * - Kill off VM if successful, resume if failed + * + * Dst: Finish + * - Wait for recv completion and check status + * - Kill off VM if unsuccessful + * + */ static virDomainPtr virDomainMigrateVersion2 (virDomainPtr domain, virConnectPtr dconn, @@ -3294,6 +3329,7 @@ virDomainMigrateVersion2 (virDomainPtr domain, flags |= VIR_MIGRATE_PAUSED; } + VIR_DEBUG("Prepare2 %p", dconn); ret = dconn->driver->domainMigratePrepare2 (dconn, &cookie, &cookielen, uri, &uri_out, flags, dname, bandwidth, dom_xml); @@ -3314,6 +3350,7 @@ virDomainMigrateVersion2 (virDomainPtr domain, /* Perform the migration. The driver isn't supposed to return * until the migration is complete. */ + VIR_DEBUG("Perform %p", domain->conn); ret = domain->conn->driver->domainMigratePerform (domain, cookie, cookielen, uri, flags, dname, bandwidth); @@ -3326,6 +3363,7 @@ virDomainMigrateVersion2 (virDomainPtr domain, * so it can do any cleanup if the migration failed. */ dname = dname ? dname : domain->name; + VIR_DEBUG("Finish2 %p ret=%d", dconn, ret); ddomain = dconn->driver->domainMigrateFinish2 (dconn, dname, cookie, cookielen, uri, flags, ret); @@ -3340,6 +3378,174 @@ virDomainMigrateVersion2 (virDomainPtr domain, } +/* + * Sequence v3: + * + * Src: Begin + * - Generate XML to pass to dst + * - Generate optional cookie to pass to dst + * + * Dst: Prepare + * - Get ready to accept incoming VM + * - Generate optional cookie to pass to src + * + * Src: Perform + * - Start migration and wait for send completion + * - Generate optional cookie to pass to dst + * + * Dst: Finish + * - Wait for recv completion and check status + * - Kill off VM if failed, resume if success + * - Generate optional cookie to pass to src + * + * Src: Confirm + * - Kill off VM if success, resume if failed + * + */ +static virDomainPtr +virDomainMigrateVersion3(virDomainPtr domain, + virConnectPtr dconn, + unsigned long flags, + const char *dname, + const char *uri, + unsigned long bandwidth) +{ + virDomainPtr ddomain = NULL; + char *uri_out = NULL; + char *cookiein = NULL; + char *cookieout = NULL; + char *dom_xml = NULL; + int cookieinlen = 0; + int cookieoutlen = 0; + int ret; + virDomainInfo info; + virErrorPtr orig_err = NULL; + int cancelled; + + if (!domain->conn->driver->domainMigrateBegin3 || + !domain->conn->driver->domainMigratePerform3 || + !domain->conn->driver->domainMigrateConfirm3 || + !dconn->driver->domainMigratePrepare3 || + !dconn->driver->domainMigrateFinish3) { + virLibConnError(VIR_ERR_INTERNAL_ERROR, __FUNCTION__); + virDispatchError(domain->conn); + return NULL; + } + + VIR_DEBUG("Begin3 %p", domain->conn); + dom_xml = domain->conn->driver->domainMigrateBegin3 + (domain, &cookieout, &cookieoutlen, flags, dname, + bandwidth); + if (!dom_xml) + goto done; + + ret = virDomainGetInfo (domain, &info); + if (ret == 0 && info.state == VIR_DOMAIN_PAUSED) { + flags |= VIR_MIGRATE_PAUSED; + } + + VIR_DEBUG("Prepare3 %p", dconn); + cookiein = cookieout; + cookieinlen = cookieoutlen; + cookieout = NULL; + cookieoutlen = 0; + ret = dconn->driver->domainMigratePrepare3 + (dconn, cookiein, cookieinlen, &cookieout, &cookieoutlen, + uri, &uri_out, flags, dname, bandwidth, dom_xml); + VIR_FREE (dom_xml); + if (ret == -1) + goto done; + + if (uri == NULL && uri_out == NULL) { + virLibConnError(VIR_ERR_INTERNAL_ERROR, + _("domainMigratePrepare2 did not set uri")); + virDispatchError(domain->conn); + goto done; + } + if (uri_out) + uri = uri_out; /* Did domainMigratePrepare2 change URI? */ + assert (uri != NULL); + + /* Perform the migration. The driver isn't supposed to return + * until the migration is complete. The src VM should remain + * running, but in paused state until the destination can + * confirm migration completion. + */ + VIR_DEBUG("Perform3 %p uri=%s", domain->conn, uri); + VIR_FREE(cookiein); + cookiein = cookieout; + cookieinlen = cookieoutlen; + cookieout = NULL; + cookieoutlen = 0; + ret = domain->conn->driver->domainMigratePerform3 + (domain, cookiein, cookieinlen, &cookieout, &cookieoutlen, + uri, flags, dname, bandwidth); + + /* Perform failed. Make sure Finish doesn't overwrite the error */ + if (ret < 0) + orig_err = virSaveLastError(); + + /* If Perform returns < 0, then we need to cancel the VM + * startup on the destination + */ + cancelled = ret < 0 ? 1 : 0; + + /* + * The status code from the source is passed to the destination. + * The dest can cleanup in the source indicated it failed to + * send all migration data. Returns NULL for ddomain if + * the dest was unable to complete migration. + */ + VIR_DEBUG("Finish3 %p ret=%d", dconn, ret); + VIR_FREE(cookiein); + cookiein = cookieout; + cookieinlen = cookieoutlen; + cookieout = NULL; + cookieoutlen = 0; + dname = dname ? dname : domain->name; + ret = dconn->driver->domainMigrateFinish3 + (dconn, dname, cookiein, cookieinlen, &cookieout, &cookieoutlen, + uri, flags, cancelled, &ddomain); + + /* If ret is 0 then 'ddomain' indicates whether the VM is + * running on the dest. If not running, we can restart + * the source. If ret is -1, we can't be sure what happened + * to the VM on the dest, thus the only safe option is to + * kill the VM on the source, even though that may leave + * no VM at all on either host. + */ + cancelled = ret == 0 && ddomain == NULL ? 1 : 0; + + /* + * If cancelled, then src VM will be restarted, else + * it will be killed + */ + VIR_DEBUG("Confirm3 %p ret=%d domain=%p", domain->conn, ret, domain); + VIR_FREE(cookiein); + cookiein = cookieout; + cookieinlen = cookieoutlen; + cookieout = NULL; + cookieoutlen = 0; + ret = domain->conn->driver->domainMigrateConfirm3 + (domain, cookiein, cookieinlen, + flags, cancelled); + /* If Confirm3 returns -1, there's nothing more we can + * do, but fortunately worst case is that there is a + * domain left in 'paused' state on source. + */ + + done: + if (orig_err) { + virSetError(orig_err); + virFreeError(orig_err); + } + VIR_FREE(uri_out); + VIR_FREE(cookiein); + VIR_FREE(cookieout); + return ddomain; +} + + /* * This is sort of a migration v3 * @@ -3539,6 +3745,7 @@ virDomainMigrate (virDomainPtr domain, return NULL; } + VIR_DEBUG0("Using peer2peer migration"); if (virDomainMigratePeer2Peer(domain, flags, dname, uri ? uri : dstURI, bandwidth) < 0) { VIR_FREE(dstURI); goto error; @@ -3560,16 +3767,24 @@ virDomainMigrate (virDomainPtr domain, /* Check that migration is supported by both drivers. */ if (VIR_DRV_SUPPORTS_FEATURE(domain->conn->driver, domain->conn, - VIR_DRV_FEATURE_MIGRATION_V1) && + VIR_DRV_FEATURE_MIGRATION_V3) && VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, - VIR_DRV_FEATURE_MIGRATION_V1)) - ddomain = virDomainMigrateVersion1(domain, dconn, flags, dname, uri, bandwidth); - else if (VIR_DRV_SUPPORTS_FEATURE(domain->conn->driver, domain->conn, - VIR_DRV_FEATURE_MIGRATION_V2) && - VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, - VIR_DRV_FEATURE_MIGRATION_V2)) + VIR_DRV_FEATURE_MIGRATION_V3)) { + VIR_DEBUG0("Using migration protocol 3"); + ddomain = virDomainMigrateVersion3(domain, dconn, flags, dname, uri, bandwidth); + } else if (VIR_DRV_SUPPORTS_FEATURE(domain->conn->driver, domain->conn, + VIR_DRV_FEATURE_MIGRATION_V2) && + VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, + VIR_DRV_FEATURE_MIGRATION_V2)) { + VIR_DEBUG0("Using migration protocol 2"); ddomain = virDomainMigrateVersion2(domain, dconn, flags, dname, uri, bandwidth); - else { + } else if (VIR_DRV_SUPPORTS_FEATURE(domain->conn->driver, domain->conn, + VIR_DRV_FEATURE_MIGRATION_V1) && + VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, + VIR_DRV_FEATURE_MIGRATION_V1)) { + VIR_DEBUG0("Using migration protocol 1"); + ddomain = virDomainMigrateVersion1(domain, dconn, flags, dname, uri, bandwidth); + } else { /* This driver does not support any migration method */ virLibConnError(VIR_ERR_NO_SUPPORT, __FUNCTION__); goto error; @@ -3998,6 +4213,330 @@ error: return -1; } +/* + * Not for public use. This function is part of the internal + * implementation of migration in the remote case. + */ +char * +virDomainMigrateBegin3(virDomainPtr domain, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long bandwidth) +{ + virConnectPtr conn; + + VIR_DOMAIN_DEBUG(domain, "cookieout=%p, cookieoutlen=%p, " + "flags=%lu, dname=%s, bandwidth=%lu", + cookieout, cookieoutlen, flags, + NULLSTR(dname), bandwidth); + + virResetLastError(); + + if (!VIR_IS_CONNECTED_DOMAIN (domain)) { + virLibDomainError(VIR_ERR_INVALID_DOMAIN, __FUNCTION__); + virDispatchError(NULL); + return NULL; + } + conn = domain->conn; + + if (domain->conn->flags & VIR_CONNECT_RO) { + virLibDomainError(VIR_ERR_OPERATION_DENIED, __FUNCTION__); + goto error; + } + + if (conn->driver->domainMigrateBegin3) { + char *xml; + xml = conn->driver->domainMigrateBegin3(domain, + cookieout, cookieoutlen, + flags, dname, bandwidth); + VIR_DEBUG("xml %s", xml); + if (!xml) + goto error; + return xml; + } + + virLibDomainError(VIR_ERR_NO_SUPPORT, __FUNCTION__); + +error: + virDispatchError(domain->conn); + return NULL; +} + + +/* + * Not for public use. This function is part of the internal + * implementation of migration in the remote case. + */ +int +virDomainMigratePrepare3(virConnectPtr dconn, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri_in, + char **uri_out, + unsigned long flags, + const char *dname, + unsigned long bandwidth, + const char *dom_xml) +{ + VIR_DEBUG("dconn=%p, cookiein=%p, cookieinlen=%d, cookieout=%p, cookieoutlen=%p," + "uri_in=%s, uri_out=%p, flags=%lu, dname=%s, bandwidth=%lu, dom_xml=%s", + dconn, cookiein, cookieinlen, cookieout, cookieoutlen, uri_in, uri_out, + flags, NULLSTR(dname), bandwidth, dom_xml); + + virResetLastError(); + + if (!VIR_IS_CONNECT (dconn)) { + virLibConnError(VIR_ERR_INVALID_CONN, __FUNCTION__); + virDispatchError(NULL); + return -1; + } + + if (dconn->flags & VIR_CONNECT_RO) { + virLibConnError(VIR_ERR_OPERATION_DENIED, __FUNCTION__); + goto error; + } + + if (dconn->driver->domainMigratePrepare3) { + int ret; + ret = dconn->driver->domainMigratePrepare3(dconn, + cookiein, cookieinlen, + cookieout, cookieoutlen, + uri_in, uri_out, + flags, dname, bandwidth, + dom_xml); + if (ret < 0) + goto error; + return ret; + } + + virLibConnError(VIR_ERR_NO_SUPPORT, __FUNCTION__); + +error: + virDispatchError(dconn); + return -1; +} + +/* + * Not for public use. This function is part of the internal + * implementation of migration in the remote case. + */ +int +virDomainMigratePrepareTunnel3(virConnectPtr conn, + virStreamPtr st, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long bandwidth, + const char *dom_xml) + +{ + VIR_DEBUG("conn=%p, stream=%p, cookiein=%p, cookieinlen=%d, cookieout=%p," + " cookieoutlen=%p, flags=%lu, dname=%s, bandwidth=%lu, dom_xml=%s", + conn, st, cookiein, cookieinlen, cookieout, cookieoutlen, flags, + NULLSTR(dname), bandwidth, dom_xml); + + virResetLastError(); + + if (!VIR_IS_CONNECT(conn)) { + virLibConnError(VIR_ERR_INVALID_CONN, __FUNCTION__); + virDispatchError(NULL); + return -1; + } + + if (conn->flags & VIR_CONNECT_RO) { + virLibConnError(VIR_ERR_OPERATION_DENIED, __FUNCTION__); + goto error; + } + + if (conn != st->conn) { + virLibConnError(VIR_ERR_INVALID_ARG, __FUNCTION__); + goto error; + } + + if (conn->driver->domainMigratePrepareTunnel3) { + int rv = conn->driver->domainMigratePrepareTunnel3(conn, st, + cookiein, cookieinlen, + cookieout, cookieoutlen, + flags, dname, + bandwidth, dom_xml); + if (rv < 0) + goto error; + return rv; + } + + virLibConnError(VIR_ERR_NO_SUPPORT, __FUNCTION__); + +error: + virDispatchError(conn); + return -1; +} + + +/* + * Not for public use. This function is part of the internal + * implementation of migration in the remote case. + */ +int +virDomainMigratePerform3(virDomainPtr domain, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long bandwidth) +{ + virConnectPtr conn; + + VIR_DOMAIN_DEBUG(domain, "cookiein=%p, cookieinlen=%d, cookieout=%p, cookieoutlen=%p," + "uri=%s, flags=%lu, dname=%s, bandwidth=%lu", + cookiein, cookieinlen, cookieout, cookieoutlen, + uri, flags, NULLSTR(dname), bandwidth); + + virResetLastError(); + + if (!VIR_IS_CONNECTED_DOMAIN (domain)) { + virLibDomainError(VIR_ERR_INVALID_DOMAIN, __FUNCTION__); + virDispatchError(NULL); + return -1; + } + conn = domain->conn; + + if (domain->conn->flags & VIR_CONNECT_RO) { + virLibDomainError(VIR_ERR_OPERATION_DENIED, __FUNCTION__); + goto error; + } + + if (conn->driver->domainMigratePerform3) { + int ret; + ret = conn->driver->domainMigratePerform3(domain, + cookiein, cookieinlen, + cookieout, cookieoutlen, + uri, + flags, dname, bandwidth); + if (ret < 0) + goto error; + return ret; + } + + virLibDomainError(VIR_ERR_NO_SUPPORT, __FUNCTION__); + +error: + virDispatchError(domain->conn); + return -1; +} + + +/* + * Not for public use. This function is part of the internal + * implementation of migration in the remote case. + */ +int +virDomainMigrateFinish3(virConnectPtr dconn, + const char *dname, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + int cancelled, + virDomainPtr *newdom) +{ + VIR_DEBUG("dconn=%p, dname=%s, cookiein=%p, cookieinlen=%d, cookieout=%p," + "cookieoutlen=%p, uri=%s, flags=%lu, retcode=%d newdom=%p", + dconn, NULLSTR(dname), cookiein, cookieinlen, cookieout, + cookieoutlen, uri, flags, cancelled, newdom); + + virResetLastError(); + + if (!VIR_IS_CONNECT (dconn)) { + virLibConnError(VIR_ERR_INVALID_CONN, __FUNCTION__); + virDispatchError(NULL); + return -1; + } + + if (dconn->flags & VIR_CONNECT_RO) { + virLibConnError(VIR_ERR_OPERATION_DENIED, __FUNCTION__); + goto error; + } + + if (dconn->driver->domainMigrateFinish3) { + int ret; + ret = dconn->driver->domainMigrateFinish3(dconn, dname, + cookiein, cookieinlen, + cookieout, cookieoutlen, + uri, flags, + cancelled, + newdom); + if (!ret) + goto error; + return ret; + } + + virLibConnError(VIR_ERR_NO_SUPPORT, __FUNCTION__); + +error: + virDispatchError(dconn); + return -1; +} + + +/* + * Not for public use. This function is part of the internal + * implementation of migration in the remote case. + */ +int +virDomainMigrateConfirm3(virDomainPtr domain, + const char *cookiein, + int cookieinlen, + unsigned long flags, + int cancelled) +{ + virConnectPtr conn; + + VIR_DOMAIN_DEBUG(domain, "cookiein=%p, cookieinlen=%d, flags=%lu, cancelled=%d", + cookiein, cookieinlen, flags, cancelled); + + virResetLastError(); + + if (!VIR_IS_CONNECTED_DOMAIN (domain)) { + virLibDomainError(VIR_ERR_INVALID_DOMAIN, __FUNCTION__); + virDispatchError(NULL); + return -1; + } + conn = domain->conn; + + if (domain->conn->flags & VIR_CONNECT_RO) { + virLibDomainError(VIR_ERR_OPERATION_DENIED, __FUNCTION__); + goto error; + } + + if (conn->driver->domainMigrateConfirm3) { + int ret; + ret = conn->driver->domainMigrateConfirm3(domain, + cookiein, cookieinlen, + flags, cancelled); + if (ret < 0) + goto error; + return ret; + } + + virLibDomainError(VIR_ERR_NO_SUPPORT, __FUNCTION__); + +error: + virDispatchError(domain->conn); + return -1; +} + /** * virNodeGetInfo: diff --git a/src/libvirt_internal.h b/src/libvirt_internal.h index 1c4fa4f..81d0c56 100644 --- a/src/libvirt_internal.h +++ b/src/libvirt_internal.h @@ -66,6 +66,13 @@ enum { * perform step is used. */ VIR_DRV_FEATURE_MIGRATION_DIRECT = 5, + + /* + * Driver supports V3-style virDomainMigrate, ie domainMigrateBegin3/ + * domainMigratePrepare3/domainMigratePerform3/domainMigrateFinish3/ + * domainMigrateConfirm3. + */ + VIR_DRV_FEATURE_MIGRATION_V3 = 6, }; @@ -115,4 +122,63 @@ int virDomainMigratePrepareTunnel(virConnectPtr dconn, unsigned long resource, const char *dom_xml); + +char *virDomainMigrateBegin3(virDomainPtr domain, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource); + +int virDomainMigratePrepare3(virConnectPtr dconn, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri_in, + char **uri_out, + unsigned long flags, + const char *dname, + unsigned long resource, + const char *dom_xml); + +int virDomainMigratePrepareTunnel3(virConnectPtr dconn, + virStreamPtr st, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource, + const char *dom_xml); + + +int virDomainMigratePerform3(virDomainPtr dom, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long resource); + +int virDomainMigrateFinish3(virConnectPtr dconn, + const char *dname, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + int cancelled, /* Kill the dst VM */ + virDomainPtr *newdom); + +int virDomainMigrateConfirm3(virDomainPtr domain, + const char *cookiein, + int cookieinlen, + unsigned long flags, + int restart); /* Restart the src VM */ + #endif diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index b9e3efe..d1e2f4c 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -523,6 +523,12 @@ virDomainMigratePerform; virDomainMigratePrepare2; virDomainMigratePrepare; virDomainMigratePrepareTunnel; +virDomainMigrateBegin3; +virDomainMigratePrepare3; +virDomainMigratePrepareTunnel3; +virDomainMigratePerform3; +virDomainMigrateFinish3; +virDomainMigrateConfirm3; virDrvSupportsFeature; virRegisterDeviceMonitor; virRegisterDriver; diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 0f78579..3997390 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -2930,6 +2930,12 @@ static virDriver lxcDriver = { lxcDomainSetMemoryParameters, /* domainSetMemoryParameters */ lxcDomainGetMemoryParameters, /* domainGetMemoryParameters */ lxcDomainOpenConsole, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; static virStateDriver lxcStateDriver = { diff --git a/src/opennebula/one_driver.c b/src/opennebula/one_driver.c index 75d7b9a..9c06a99 100644 --- a/src/opennebula/one_driver.c +++ b/src/opennebula/one_driver.c @@ -829,6 +829,12 @@ static virDriver oneDriver = { NULL, /* domainSetMemoryParameters */ NULL, /* domainGetMemoryParameters */ NULL, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; static virStateDriver oneStateDriver = { diff --git a/src/openvz/openvz_driver.c b/src/openvz/openvz_driver.c index 00d378a..68d7ac0 100644 --- a/src/openvz/openvz_driver.c +++ b/src/openvz/openvz_driver.c @@ -1669,6 +1669,12 @@ static virDriver openvzDriver = { NULL, /* domainSetMemoryParameters */ NULL, /* domainGetMemoryParameters */ NULL, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; int openvzRegister(void) { diff --git a/src/phyp/phyp_driver.c b/src/phyp/phyp_driver.c index d954f2a..4a108f0 100644 --- a/src/phyp/phyp_driver.c +++ b/src/phyp/phyp_driver.c @@ -4051,6 +4051,12 @@ static virDriver phypDriver = { NULL, /* domainSetMemoryParameters */ NULL, /* domainGetMemoryParameters */ NULL, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; static virStorageDriver phypStorageDriver = { diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 21e88f8..82f735a 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -6854,6 +6854,12 @@ static virDriver qemuDriver = { qemuDomainSetMemoryParameters, /* domainSetMemoryParameters */ qemuDomainGetMemoryParameters, /* domainGetMemoryParameters */ qemuDomainOpenConsole, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index 4ca0d3b..464222c 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -10946,6 +10946,12 @@ static virDriver remote_driver = { remoteDomainSetMemoryParameters, /* domainSetMemoryParameters */ remoteDomainGetMemoryParameters, /* domainGetMemoryParameters */ remoteDomainOpenConsole, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; static virNetworkDriver network_driver = { diff --git a/src/test/test_driver.c b/src/test/test_driver.c index 1937da0..55a2c8a 100644 --- a/src/test/test_driver.c +++ b/src/test/test_driver.c @@ -5443,6 +5443,12 @@ static virDriver testDriver = { NULL, /* domainSetMemoryParameters */ NULL, /* domainGetMemoryParameters */ NULL, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; static virNetworkDriver testNetworkDriver = { diff --git a/src/uml/uml_driver.c b/src/uml/uml_driver.c index 2af8002..531523d 100644 --- a/src/uml/uml_driver.c +++ b/src/uml/uml_driver.c @@ -2245,6 +2245,12 @@ static virDriver umlDriver = { NULL, /* domainSetMemoryParamters */ NULL, /* domainGetMemoryParamters */ umlDomainOpenConsole, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; static int diff --git a/src/vbox/vbox_tmpl.c b/src/vbox/vbox_tmpl.c index cf3cbc6..b910932 100644 --- a/src/vbox/vbox_tmpl.c +++ b/src/vbox/vbox_tmpl.c @@ -8644,6 +8644,12 @@ virDriver NAME(Driver) = { NULL, /* domainSetMemoryParameters */ NULL, /* domainGetMemoryParameters */ NULL, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; virNetworkDriver NAME(NetworkDriver) = { diff --git a/src/vmware/vmware_driver.c b/src/vmware/vmware_driver.c index 22b29d1..1b5f537 100644 --- a/src/vmware/vmware_driver.c +++ b/src/vmware/vmware_driver.c @@ -1005,6 +1005,12 @@ static virDriver vmwareDriver = { NULL, /* domainSetMemoryParameters */ NULL, /* domainGetMemoryParameters */ NULL, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; int diff --git a/src/xen/xen_driver.c b/src/xen/xen_driver.c index b14c8db..d752a91 100644 --- a/src/xen/xen_driver.c +++ b/src/xen/xen_driver.c @@ -2101,6 +2101,12 @@ static virDriver xenUnifiedDriver = { NULL, /* domainSetMemoryParameters */ NULL, /* domainGetMemoryParameters */ xenUnifiedDomainOpenConsole, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; /** diff --git a/src/xenapi/xenapi_driver.c b/src/xenapi/xenapi_driver.c index 7851e93..4458d3f 100644 --- a/src/xenapi/xenapi_driver.c +++ b/src/xenapi/xenapi_driver.c @@ -1881,6 +1881,12 @@ static virDriver xenapiDriver = { NULL, /* domainSetMemoryParameters */ NULL, /* domainGetMemoryParameters */ NULL, /* domainOpenConsole */ + NULL, /* domainMigrateBegin3 */ + NULL, /* domainMigratePrepare3 */ + NULL, /* domainMigratePrepareTunnel3 */ + NULL, /* domainMigratePerform3 */ + NULL, /* domainMigrateFinish3 */ + NULL, /* domainMigrateConfirm3 */ }; /** -- 1.7.4

On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
This patch attempts to introduce a version 3 that uses the improved 5 step sequence
* Src: Begin - Generate XML to pass to dst - Generate optional cookie to pass to dst
* Dst: Prepare - Get ready to accept incoming VM - Generate optional cookie to pass to src
* Src: Perform - Start migration and wait for send completion - Generate optional cookie to pass to dst
* Dst: Finish - Wait for recv completion and check status - Kill off VM if failed, resume if success - Generate optional cookie to pass to src
* Src: Confirm - Kill off VM if success, resume if failed
The API is designed to allow both input and output cookies in all methods where applicable. This lets us pass around arbitrary extra driver specific data between src & dst during migration. Combined with the extra 'Begin' method this lets us pass lease information from source to dst at the start of migration
This sounds like a post-0.8.8 change, but a good one.
+static virDomainPtr +virDomainMigrateVersion3(virDomainPtr domain, + virConnectPtr dconn, + unsigned long flags, + const char *dname, + const char *uri, + unsigned long bandwidth) +{ + + if (uri == NULL && uri_out == NULL) { + virLibConnError(VIR_ERR_INTERNAL_ERROR, + _("domainMigratePrepare2 did not set uri"));
s/2/3/
+ virDispatchError(domain->conn); + goto done; + } + if (uri_out) + uri = uri_out; /* Did domainMigratePrepare2 change URI? */
s/2/3/
+ /* + * The status code from the source is passed to the destination. + * The dest can cleanup in the source indicated it failed to
s/in/if/
+ if (conn->driver->domainMigrateBegin3) { + char *xml; + xml = conn->driver->domainMigrateBegin3(domain, + cookieout, cookieoutlen, + flags, dname, bandwidth); + VIR_DEBUG("xml %s", xml);
s/ xml/ NULLSTR(xml)/
+int virDomainMigrateFinish3(virConnectPtr dconn, + const char *dname, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + int cancelled, /* Kill the dst VM */ + virDomainPtr *newdom); + +int virDomainMigrateConfirm3(virDomainPtr domain, + const char *cookiein, + int cookieinlen, + unsigned long flags, + int restart); /* Restart the src VM */
Since cancelled and restart are basically bool, should they be incorporated into flags rather than an extra argument? But I'm fine with leaving them separate, especially since that leaves flags for just those bits requested by the user. ACK with those nits fixed. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
This patch attempts to introduce a version 3 that uses the improved 5 step sequence
* Src: Begin - Generate XML to pass to dst - Generate optional cookie to pass to dst
* Dst: Prepare - Get ready to accept incoming VM - Generate optional cookie to pass to src
* Src: Perform - Start migration and wait for send completion - Generate optional cookie to pass to dst
* Dst: Finish - Wait for recv completion and check status - Kill off VM if failed, resume if success - Generate optional cookie to pass to src
* Src: Confirm - Kill off VM if success, resume if failed
I've been thinking about this a bit more, and have a question. What happens when the source side decides to abort the migration? For example, if libvirtd on the source gets a SIGHUP or SIGINT, it would be nice to have the cleanup code abort any in-flight migrations so that when libvirtd is restarted, the guest is still operational on the source, and the guest does not have to wait for a full TCP timeout cycle to realize that the source is not going to complete the migration. Does this call for additional internal points in the RPC implementation of v3 migration? -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On Tue, Apr 12, 2011 at 05:12:18PM -0600, Eric Blake wrote:
On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
This patch attempts to introduce a version 3 that uses the improved 5 step sequence
* Src: Begin - Generate XML to pass to dst - Generate optional cookie to pass to dst
* Dst: Prepare - Get ready to accept incoming VM - Generate optional cookie to pass to src
* Src: Perform - Start migration and wait for send completion - Generate optional cookie to pass to dst
* Dst: Finish - Wait for recv completion and check status - Kill off VM if failed, resume if success - Generate optional cookie to pass to src
* Src: Confirm - Kill off VM if success, resume if failed
I've been thinking about this a bit more, and have a question. What happens when the source side decides to abort the migration? For example, if libvirtd on the source gets a SIGHUP or SIGINT, it would be nice to have the cleanup code abort any in-flight migrations so that when libvirtd is restarted, the guest is still operational on the source, and the guest does not have to wait for a full TCP timeout cycle to realize that the source is not going to complete the migration.
Does this call for additional internal points in the RPC implementation of v3 migration?
The source can already abort migration, even in the v2 protocol, using the virDomainJobAbort() API (or virsh domjobabort). This issues a 'migrate_cancel' monitor command to QEMU, which in turns causes the 'perform' step to return failure, which is passed to the 'finish' step which tears down the destination VM Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Daniel, I looked at the patch-set you sent out on the 2/9/11 [libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver http://www.mail-archive.com/libvir-list@redhat.com/msg33223.html What is the status of this new migration protocol? Is there any pending issue blocking its integration? I would like to propose an RFC enhancement to the migration algorithm. Here is a quick summary of the proposal/idea. - finer control on migration result - possibility of specifying what features cannot fail their initialization on the dst host during migration. Migration should not succeed if any of them fails. - optional: each one of those features should be able to provide a deinit function to cleanup resources on the dst host if migration fails. This functionality would come useful for the (NIC) set port profile feature VDP (802.1Qbg/1Qbh), but what I propose is a generic config option / API that can be used by any feature. And now the details. ---------------------------------------------- enhancement: finer control on migration result ---------------------------------------------- There are different reasons why a VM may need (or be forced) to migrate. You can classify the types of the migrations also based on different semantics. For simplicity I'll classify them into two categories, based on how important it is for the VM to migrate as fast as possible: (1) It IS important In this case, whether the VM will not be able to (temporary) make use of certain resources (for example the network) on the dst host, is not that important, because the completion of the migration is considered higher priority. A possible scenario could be a server that must migrate ASAP because of a disaster/emergency. (2) It IS NOT important I can think of a VM whose applications/servers need a network connection in order to work properly. Loosing such network connectivity as a consequence of a migration would not be acceptable (or highly undesirable). Given the case (2) above, I have a comment about the Finish step, with regards to the port profile (VDP) codepath. The call to qemuMigrationVPAssociatePortProfile in qemuMigrationFinish can fail, but its result (success or failure) does not influence the result of the migration Finish step (it was already like this in migration V2). It is therefore possible for a VM to lose its network connectivity after a (successful) migration. Given that you are in the process of adding a new migration version, I was wondering if we could add a mechanism for the src host to select behavior/policy (1) or (2). It would be one more option in the libvirt XML domain config file. The above example refers to one specific feature (network connectivity based on the use of a port profile, ie VDP), but in theory this feature could be made generic such that the same policy (1)/(2) could be used for any feature that the admin may not be willing to lose as result of a (successful) migration. The management application on the src host (virt-manager, RHEV-M, etc) can for example, in case (2), retry the migration toward the same host or try with another destination host. The port profile configuration happens in two stages: - there is a 1st call during PREPARE: openMacvtaptap | +-> (a) vpAssociatePortProfileId (PREASSOCIATE) - and a second call during FINISH: qemuMigrationVPAssociatePortProfile | +-> (b) vpAssociatePortProfileId (ASSOCIATE) Right now, both in migration V2 and V3: - (a) cannot fail (ie, if it fails, migration fails too), - (b) can fail (ie, if it fails, migration does not fail and VM loses connectivity). With the introduction of the policy (1)/(2) mentioned above, this would be the new behavior: - Case (1) above, ie fast migration --> (a) and (b) are non blocking --> Their result does not affect migration result. - Case (2) above, ie possibly slower migration --> (a) and (b) are blocking --> Their result does affect migration result. It may be not possible to handle all possible scenarios (there can be cases where dependencies between features would make it difficult to chose between (1) and (2) on a per feature basis), but that should not be a reason not to provide this policy for a subset of features (network connectivity being one). The 5-steps new migration version V3 makes it easier (compared to V2) to introduce this kind of policy. I guess we could use the cookies to carry the necessary information/acks. There are still numerous things that can go wrong. For example, if migration aborts after (a), or even after (b), there should be a way for Libvirt to undo the set port profile. Actually, there should be a way for any feature (not just VDP) that successfully initialized on the dst host to (at least try to) cleanup. For this reason, it could make sense, for each one of the features using policy (2) (ie, cannot fail during migration) to also provide a deinit routine (as part of the API that marks the feature as "can't fail"). Such routine/s can be called by libvirt to revert/rollback the initializations that successfully completed during the migration attempt. This could be done probably at the end of the Finish3 phase. If something goes wrong (including a signal that aborts migration just before Confirm3 completes), we may have a small problem. I do not know how we can handle that case without another cleanup RPC call. If you agree that this kind of functionality/policy described above has some value, I can help with its implementation and testing. BTW, would the new functionality being discussed in this 3D "RFC: virInterface change transaction API" http://www.redhat.com/archives/libvir-list/2011-April/msg00499.html be able to provide the same configuration "atomicity" (ie, rollback in case of migration failure)? My understanding is that: - Such new framework would apply to (host) network config only. Even though it may cover the VDP (port profile) use case I mentioned above, it would not apply to other features that may need some kind of rollback after a migration failure. - Would it be possible to design that new transaction API (or a sister API) such that it would come useful for the guest migration case too? If so, the deinit/rollback routine I proposed may be defined as a simple wrapper around the transaction API and let other (non networking) features use their own deinit functions. Thanks /Chris
-----Original Message----- From: libvir-list-bounces@redhat.com [mailto:libvir-list- bounces@redhat.com] On Behalf Of Eric Blake Sent: Tuesday, April 12, 2011 4:12 PM To: Daniel P. Berrange Cc: libvir-list@redhat.com Subject: Re: [libvirt] [PATCH 3/6] Introduce yet another migration version in API.
On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
This patch attempts to introduce a version 3 that uses the improved 5 step sequence
* Src: Begin - Generate XML to pass to dst - Generate optional cookie to pass to dst
* Dst: Prepare - Get ready to accept incoming VM - Generate optional cookie to pass to src
* Src: Perform - Start migration and wait for send completion - Generate optional cookie to pass to dst
* Dst: Finish - Wait for recv completion and check status - Kill off VM if failed, resume if success - Generate optional cookie to pass to src
* Src: Confirm - Kill off VM if success, resume if failed
I've been thinking about this a bit more, and have a question. What happens when the source side decides to abort the migration? For example, if libvirtd on the source gets a SIGHUP or SIGINT, it would be nice to have the cleanup code abort any in-flight migrations so that when libvirtd is restarted, the guest is still operational on the source, and the guest does not have to wait for a full TCP timeout cycle to realize that the source is not going to complete the migration.
Does this call for additional internal points in the RPC implementation of v3 migration?
-- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 04/20/2011 05:28 PM, Christian Benvenuti (benve) wrote:
Daniel, I looked at the patch-set you sent out on the 2/9/11
[libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver http://www.mail-archive.com/libvir-list@redhat.com/msg33223.html
What is the status of this new migration protocol? Is there any pending issue blocking its integration?
I would like to propose an RFC enhancement to the migration algorithm.
Here is a quick summary of the proposal/idea.
- finer control on migration result
- possibility of specifying what features cannot fail their initialization on the dst host during migration. Migration should not succeed if any of them fails. - optional: each one of those features should be able to provide a deinit function to cleanup resources on the dst host if migration fails.
This functionality would come useful for the (NIC) set port profile feature VDP (802.1Qbg/1Qbh), but what I propose is a generic config option / API that can be used by any feature.
And now the details.
---------------------------------------------- enhancement: finer control on migration result ----------------------------------------------
There are different reasons why a VM may need (or be forced) to migrate. You can classify the types of the migrations also based on different semantics. For simplicity I'll classify them into two categories, based on how important it is for the VM to migrate as fast as possible:
(1) It IS important
In this case, whether the VM will not be able to (temporary) make use of certain resources (for example the network) on the dst host, is not that important, because the completion of the migration is considered higher priority. A possible scenario could be a server that must migrate ASAP because of a disaster/emergency.
(2) It IS NOT important
I can think of a VM whose applications/servers need a network connection in order to work properly. Loosing such network connectivity as a consequence of a migration would not be acceptable (or highly undesirable).
Given the case (2) above, I have a comment about the Finish step, with regards to the port profile (VDP) codepath.
The call to
qemuMigrationVPAssociatePortProfile
in qemuMigrationFinish
can fail, but its result (success or failure) does not influence the result of the migration Finish step (it was already like this in migration V2).
I *believe* the underlying problem is Qemu's switch-over. Once Qemu decides that the migration was successful, Qemu on the source side dies and continues running on the destination side. I don't think there are more handshakes foreseen with higher layers that this could be reversed or the switch-over delayed, but correct me if I am wrong... So now whatever we do, we'd have to associate the port profile before the actual switch-over, if we wanted to do something better than what is there now and have the opportunity to terminate the migration before the switch-over by Qemu happens in case of failure to associate profiles. The problem is to know when the switch-over happens or when the migration goes into the final phase where the source side doesn't run anymore. The would allow us to not associate the ports right at the beginning of the migration but maybe towards the time when for example in live-migration the source is not running anymore *and* also we have the result of the association before Qemu on the source dies for good. I think some additional coordination between libvirt and Qemu would be necessary so that if higher layer ops fail before the resume on the destination side happens that Qemu can still fall back to the source side. I believe what could happen now is that a VM could be transferred too fast (by the Qemu process) while the association (in libvirt) happens, Qemu on the source side dies, and then we only get the negative result of the association. Maybe the simplest solution would be if Qemu on the source side waited for a command before transferring the last packet so we still have a chance to cancel and Qemu doesn't just 'run away' underneath libvirt's feet ;-). I assume that 2 associations with the same profile are possible with 802.1Qbg and Qbh. Both are also going through a Pre-associate state now. Are there any side-effects if associating twice on the same switch like no packets that can be sent on the source side or something like that -- obviously this would be bad if this happened early during live-migration and we'd want to push the association close to the 'final migration phase', which in turn may require more coordination with Qemu (don't know whether the final phase can be determine now -- maybe via polling Qemu's monitor). Regards, Stefan

On 04/20/2011 05:28 PM, Christian Benvenuti (benve) wrote:
Daniel, I looked at the patch-set you sent out on the 2/9/11
[libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver http://www.mail-archive.com/libvir-list@redhat.com/msg33223.html
What is the status of this new migration protocol? Is there any pending issue blocking its integration?
I would like to propose an RFC enhancement to the migration algorithm.
Here is a quick summary of the proposal/idea.
- finer control on migration result
- possibility of specifying what features cannot fail their initialization on the dst host during migration. Migration should not succeed if any of them fails. - optional: each one of those features should be able to provide a deinit function to cleanup resources on the dst host if migration fails.
This functionality would come useful for the (NIC) set port profile feature VDP (802.1Qbg/1Qbh), but what I propose is a generic config option / API that can be used by any feature.
And now the details.
---------------------------------------------- enhancement: finer control on migration result ----------------------------------------------
There are different reasons why a VM may need (or be forced) to migrate. You can classify the types of the migrations also based on different semantics. For simplicity I'll classify them into two categories, based on how important it is for the VM to migrate as fast as possible:
(1) It IS important
In this case, whether the VM will not be able to (temporary) make use of certain resources (for example the network) on the dst host, is not that important, because the completion of the migration is considered higher priority. A possible scenario could be a server that must migrate ASAP because of a disaster/emergency.
(2) It IS NOT important
I can think of a VM whose applications/servers need a network connection in order to work properly. Loosing such network connectivity as a consequence of a migration would not be acceptable (or highly undesirable).
Given the case (2) above, I have a comment about the Finish step, with regards to the port profile (VDP) codepath.
The call to
qemuMigrationVPAssociatePortProfile
in qemuMigrationFinish
can fail, but its result (success or failure) does not influence the result of the migration Finish step (it was already like this in migration V2).
I *believe* the underlying problem is Qemu's switch-over. Once Qemu decides that the migration was successful, Qemu on the source side dies and continues running on the destination side. I don't think there are more handshakes foreseen with higher layers that this could be reversed or the switch-over delayed, but correct me if I am wrong...
So now whatever we do, we'd have to associate the port profile before the actual switch-over, if we wanted to do something better than what is there now and have the opportunity to terminate the migration before the switch-over by Qemu happens in case of failure to associate profiles. The problem is to know when the switch-over happens or when the migration goes into the final phase where the source side doesn't run anymore. The would allow us to not associate the ports right at the beginning of the migration but maybe towards the time when for example in live-migration the source is not running anymore *and* also we have the result of the association before Qemu on the source dies for good. I think some additional coordination between libvirt and Qemu would be necessary so that if higher layer ops fail before the resume on the destination side happens that Qemu can still fall back to the source side. I believe what could happen now is that a VM could be
too fast (by the Qemu process) while the association (in libvirt) happens, Qemu on the source side dies, and then we only get the negative result of the association. Maybe the simplest solution would be if Qemu on the source side waited for a command before transferring the last packet so we still have a chance to cancel and Qemu doesn't just 'run away' underneath libvirt's feet ;-).
I assume that 2 associations with the same profile are possible with 802.1Qbg and Qbh. Both are also going through a Pre-associate state now. Are there any side-effects if associating twice on the same switch
no packets that can be sent on the source side or something like that
Actually I think this is not what happens in migration V3. My understanding is this: - the qemu cmdline built by Libvirt on the dst host during Prepare3 includes the "-S" option (ie no autostart) - the VM on the dst host does not start running until libvirt calls qemuProcessStartCPUs in the Finish3 step. This fn simply sends the "-cont" cmd to the monitor to start the VM/CPUs. If I am right, libvirt does have full control on how/when to start the CPU on the dst host, it is not QEMU to do it. The only thing libvirt does not control is when to pause the VM on the src host: QEMU does it during the stage-2 of the live-ram-copy based on the max_downtime config. However I do not think this represents a problem. Can someone confirm my understanding of the algorithm? Stefan, if this is correct, I guess the algorithm allows us to abort the migration at any time based on the success of the port profile configuration, and it would make the implementation of the policies (1)/(2) relatively easy. /Christian transferred like -
- obviously this would be bad if this happened early during live- migration and we'd want to push the association close to the 'final migration phase', which in turn may require more coordination with Qemu (don't know whether the final phase can be determine now -- maybe via polling Qemu's monitor).
Regards, Stefan

On 04/20/2011 11:38 PM, Christian Benvenuti (benve) wrote:
On 04/20/2011 05:28 PM, Christian Benvenuti (benve) wrote:
Daniel, I looked at the patch-set you sent out on the 2/9/11
[libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver http://www.mail-archive.com/libvir-list@redhat.com/msg33223.html
What is the status of this new migration protocol? Is there any pending issue blocking its integration?
I would like to propose an RFC enhancement to the migration algorithm.
Here is a quick summary of the proposal/idea.
- finer control on migration result
- possibility of specifying what features cannot fail their initialization on the dst host during migration. Migration should not succeed if any of them fails. - optional: each one of those features should be able to provide a deinit function to cleanup resources on the dst host if migration fails.
This functionality would come useful for the (NIC) set port profile feature VDP (802.1Qbg/1Qbh), but what I propose is a generic config option / API that can be used by any feature.
And now the details.
---------------------------------------------- enhancement: finer control on migration result ----------------------------------------------
There are different reasons why a VM may need (or be forced) to migrate. You can classify the types of the migrations also based on different semantics. For simplicity I'll classify them into two categories, based on how important it is for the VM to migrate as fast as possible:
(1) It IS important
In this case, whether the VM will not be able to (temporary) make use of certain resources (for example the network) on the dst host, is not that important, because the completion of the migration is considered higher priority. A possible scenario could be a server that must migrate ASAP because of a disaster/emergency.
(2) It IS NOT important
I can think of a VM whose applications/servers need a network connection in order to work properly. Loosing such network connectivity as a consequence of a migration would not be acceptable (or highly undesirable).
Given the case (2) above, I have a comment about the Finish step, with regards to the port profile (VDP) codepath.
The call to
qemuMigrationVPAssociatePortProfile
in qemuMigrationFinish
can fail, but its result (success or failure) does not influence the result of the migration Finish step (it was already like this in migration V2). I *believe* the underlying problem is Qemu's switch-over. Once Qemu decides that the migration was successful, Qemu on the source side dies and continues running on the destination side. I don't think there are more handshakes foreseen with higher layers that this could be reversed or the switch-over delayed, but correct me if I am wrong... Actually I think this is not what happens in migration V3. My understanding is this:
- the qemu cmdline built by Libvirt on the dst host during Prepare3 includes the "-S" option (ie no autostart)
- the VM on the dst host does not start running until libvirt calls qemuProcessStartCPUs in the Finish3 step. This fn simply sends the "-cont" cmd to the monitor to start the VM/CPUs. That's correct, but it's doing this already in v2. The non-autostart (-S) corresponds to Qemu's autostart here (migration.c):
void process_incoming_migration(QEMUFile *f) { if (qemu_loadvm_state(f) < 0) { fprintf(stderr, "load of migration failed\n"); exit(0); } qemu_announce_self(); DPRINTF("successfully loaded vm state\n"); incoming_expected = false; if (autostart) vm_start(); } and simply doesn't start the VM. After this function is called all sockets are closed and the communication with the source host is cut. I don't think it allows for fall-back at this point. Rather we may need a 'wait' option for migration and before the qemu_put_byte(f, QEMU_VM_EOF); in qemu_savevm_state_complete() sync with the monitor and either wait for something like migrate_finish or migrate_cancel. Regards, Stefan

On Thu, Apr 21, 2011 at 07:37:30AM -0400, Stefan Berger wrote:
On 04/20/2011 11:38 PM, Christian Benvenuti (benve) wrote:
On 04/20/2011 05:28 PM, Christian Benvenuti (benve) wrote:
Daniel, I looked at the patch-set you sent out on the 2/9/11
[libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver http://www.mail-archive.com/libvir-list@redhat.com/msg33223.html
What is the status of this new migration protocol? Is there any pending issue blocking its integration?
I would like to propose an RFC enhancement to the migration algorithm.
Here is a quick summary of the proposal/idea.
- finer control on migration result
- possibility of specifying what features cannot fail their initialization on the dst host during migration. Migration should not succeed if any of them fails. - optional: each one of those features should be able to provide a deinit function to cleanup resources on the dst host if migration fails.
This functionality would come useful for the (NIC) set port profile feature VDP (802.1Qbg/1Qbh), but what I propose is a generic config option / API that can be used by any feature.
And now the details.
---------------------------------------------- enhancement: finer control on migration result ----------------------------------------------
There are different reasons why a VM may need (or be forced) to migrate. You can classify the types of the migrations also based on different semantics. For simplicity I'll classify them into two categories, based on how important it is for the VM to migrate as fast as possible:
(1) It IS important
In this case, whether the VM will not be able to (temporary) make use of certain resources (for example the network) on the dst host, is not that important, because the completion of the migration is considered higher priority. A possible scenario could be a server that must migrate ASAP because of a disaster/emergency.
(2) It IS NOT important
I can think of a VM whose applications/servers need a network connection in order to work properly. Loosing such network connectivity as a consequence of a migration would not be acceptable (or highly undesirable).
Given the case (2) above, I have a comment about the Finish step, with regards to the port profile (VDP) codepath.
The call to
qemuMigrationVPAssociatePortProfile
in qemuMigrationFinish
can fail, but its result (success or failure) does not influence the result of the migration Finish step (it was already like this in migration V2). I *believe* the underlying problem is Qemu's switch-over. Once Qemu decides that the migration was successful, Qemu on the source side dies and continues running on the destination side. I don't think there are more handshakes foreseen with higher layers that this could be reversed or the switch-over delayed, but correct me if I am wrong... Actually I think this is not what happens in migration V3. My understanding is this:
- the qemu cmdline built by Libvirt on the dst host during Prepare3 includes the "-S" option (ie no autostart)
- the VM on the dst host does not start running until libvirt calls qemuProcessStartCPUs in the Finish3 step. This fn simply sends the "-cont" cmd to the monitor to start the VM/CPUs. That's correct, but it's doing this already in v2. The non-autostart (-S) corresponds to Qemu's autostart here (migration.c):
void process_incoming_migration(QEMUFile *f) { if (qemu_loadvm_state(f) < 0) { fprintf(stderr, "load of migration failed\n"); exit(0); } qemu_announce_self(); DPRINTF("successfully loaded vm state\n");
incoming_expected = false;
if (autostart) vm_start(); }
and simply doesn't start the VM. After this function is called all sockets are closed and the communication with the source host is cut. I don't think it allows for fall-back at this point.
Sure it does. As long as the destination QEMU CPUs have not been started, you can fallback by simply killing the dest QEMU and restarting CPUs on the src QEMU.
Rather we may need a 'wait' option for migration and before the
qemu_put_byte(f, QEMU_VM_EOF);
in qemu_savevm_state_complete() sync with the monitor and either wait for something like migrate_finish or migrate_cancel.
The real problem, is that while we can tell from 'info migrate' on the src, when the src has finished sending all data, there is no way to ask the dest QEMU when it has finished receiving all data. So libvirt assumes that 'src finished sending' == success, and will attempt to start the dst QEMU CPUs. As raised many times in the past, we need 'info migrate' to work on the destination too, in order to query success/fail. And ideally need async events emitted when migration completes, so we don't have to poll on 'info migrate' every 50ms Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 04/21/2011 07:43 AM, Daniel P. Berrange wrote:
On Thu, Apr 21, 2011 at 07:37:30AM -0400, Stefan Berger wrote:
and simply doesn't start the VM. After this function is called all sockets are closed and the communication with the source host is cut. I don't think it allows for fall-back at this point.
Sure it does. As long as the destination QEMU CPUs have not been started, you can fallback by simply killing the dest QEMU and restarting CPUs on the src QEMU.
FWIW, I did a test and disabled the starting of the CPUs on the destination side and did a sleep() instead. Before the sleep() was over the Qemu on the source side had already disappeared. Stefan

On Thu, Apr 21, 2011 at 09:33:42AM -0400, Stefan Berger wrote:
On 04/21/2011 07:43 AM, Daniel P. Berrange wrote:
On Thu, Apr 21, 2011 at 07:37:30AM -0400, Stefan Berger wrote:
and simply doesn't start the VM. After this function is called all sockets are closed and the communication with the source host is cut. I don't think it allows for fall-back at this point.
Sure it does. As long as the destination QEMU CPUs have not been started, you can fallback by simply killing the dest QEMU and restarting CPUs on the src QEMU.
FWIW, I did a test and disabled the starting of the CPUs on the destination side and did a sleep() instead. Before the sleep() was over the Qemu on the source side had already disappeared.
That is with the old v2 migration protocol. In the new v3 protocol that has changed to work as I described. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On 04/21/2011 07:43 AM, Daniel P. Berrange wrote:
On Thu, Apr 21, 2011 at 07:37:30AM -0400, Stefan Berger wrote:
and simply doesn't start the VM. After this function is called all sockets are closed and the communication with the source host is cut. I don't think it allows for fall-back at this point. Sure it does. As long as the destination QEMU CPUs have not been started, you can fallback by simply killing the dest QEMU and restarting CPUs on the src QEMU.
FWIW, I did a test and disabled the starting of the CPUs on the destination side and did a sleep() instead. Before the sleep() was over the Qemu on the source side had already disappeared. That is with the old v2 migration protocol. In the new v3 protocol
On Thu, Apr 21, 2011 at 09:33:42AM -0400, Stefan Berger wrote: that has changed to work as I described. Hm... From what I see in the Qemu code and from the tracing I have done Qemu goes right from sending out the last byte in qemu_savevm_state_complete() to closing the sockets in migrate_fd_cleanups() with this sequence in migrate_fd_put_ready(). Also
On 04/21/2011 10:24 AM, Daniel P. Berrange wrote: the receiving side attempts to receive all bytes in process_incoming_migration() and then goes straight to closing the sockets with this sequence in tcp_accept_incoming_migration(). This means that once migration has completely sent or received all bytes, the connection between the two Qemus is cut and there is no waiting for starting the CPUs or something like that. If relative to the Qemu migration the association of profiles returns a negative result after Qemu has sent all bytes, it's too late to kill the destination and fall back to the source. Stefan
Daniel

-----Original Message----- From: Stefan Berger [mailto:stefanb@linux.vnet.ibm.com] Sent: Thursday, April 21, 2011 6:34 AM To: Daniel P. Berrange Cc: Christian Benvenuti (benve); eblake@redhat.com; laine@laine.org; chrisw@redhat.com; libvir-list@redhat.com; David Wang (dwang2); Roopa Prabhu (roprabhu); Gerhard Stenzel; Jens Osterkamp; Anthony Liguori Subject: Re: [libvirt] [PATCH 3/6] Introduce yet another migration version in API.
On 04/21/2011 07:43 AM, Daniel P. Berrange wrote:
On Thu, Apr 21, 2011 at 07:37:30AM -0400, Stefan Berger wrote:
and simply doesn't start the VM. After this function is called all sockets are closed and the communication with the source host is cut. I don't think it allows for fall-back at this point.
Sure it does. As long as the destination QEMU CPUs have not been started, you can fallback by simply killing the dest QEMU and restarting CPUs on the src QEMU.
FWIW, I did a test and disabled the starting of the CPUs on the destination side and did a sleep() instead. Before the sleep() was over the Qemu on the source side had already disappeared.
Did you test this with migration V2 or migration V3? I think what you describe is V2 (V3 now is different): - Migration V2 SRC HOST DST HOST | |- dump XML | | (PREPARE) +------------------------------>Start empty VM | |(PERFORM) |- migrate cmd to monitor |- kill VM | | (FINISH) +------------------------------->Start CPU - Migration V3 SRC HOST DST HOST | |(BEGIN) |- dumpxml | (PREPARE) +------------------------------>Start empty VM | |(PERFORM) |- migrate cmd to monitor | (src CPU is now paused) | | (FINISH) +------------------------------->Start CPU | |(CONFIRM) |- if FINISH succeeded: Kill src VM |- if FINISH failed : Run src VM /Christian

Daniel, did you have a chance to look at the change I proposed as part of your new V3 migration API? (https://www.redhat.com/archives/libvir-list/2011-April/msg00978.html) Is there any plan to address the limitation raised in this 3D about the fact that the src host does not know when the dst host (qemu) has received all the data in the perform step? Also, what about the change I proposed, which would allow the admin to tell libvirt what initializations (=features) must succeed on the dst host for the migration to be considered successful? (I'll be happy to help if needed) /Chris
-----Original Message----- From: Christian Benvenuti (benve) Sent: Thursday, April 21, 2011 6:35 PM To: 'Stefan Berger'; Daniel P. Berrange Cc: eblake@redhat.com; laine@laine.org; chrisw@redhat.com; libvir- list@redhat.com; David Wang (dwang2); Roopa Prabhu (roprabhu); Gerhard Stenzel; Jens Osterkamp; Anthony Liguori Subject: RE: [libvirt] [PATCH 3/6] Introduce yet another migration version in API.
-----Original Message----- From: Stefan Berger [mailto:stefanb@linux.vnet.ibm.com] Sent: Thursday, April 21, 2011 6:34 AM To: Daniel P. Berrange Cc: Christian Benvenuti (benve); eblake@redhat.com; laine@laine.org; chrisw@redhat.com; libvir-list@redhat.com; David Wang (dwang2); Roopa Prabhu (roprabhu); Gerhard Stenzel; Jens Osterkamp; Anthony Liguori Subject: Re: [libvirt] [PATCH 3/6] Introduce yet another migration version in API.
On 04/21/2011 07:43 AM, Daniel P. Berrange wrote:
On Thu, Apr 21, 2011 at 07:37:30AM -0400, Stefan Berger wrote:
and simply doesn't start the VM. After this function is called all sockets are closed and the communication with the source host is cut. I don't think it allows for fall-back at this point.
Sure it does. As long as the destination QEMU CPUs have not been started, you can fallback by simply killing the dest QEMU and restarting CPUs on the src QEMU.
FWIW, I did a test and disabled the starting of the CPUs on the destination side and did a sleep() instead. Before the sleep() was over the Qemu on the source side had already disappeared.
Did you test this with migration V2 or migration V3? I think what you describe is V2 (V3 now is different):
- Migration V2
SRC HOST DST HOST | |- dump XML | | (PREPARE) +------------------------------>Start empty VM | |(PERFORM) |- migrate cmd to monitor |- kill VM | | (FINISH) +------------------------------->Start CPU
- Migration V3
SRC HOST DST HOST | |(BEGIN) |- dumpxml | (PREPARE) +------------------------------>Start empty VM | |(PERFORM) |- migrate cmd to monitor | (src CPU is now paused) | | (FINISH) +------------------------------->Start CPU | |(CONFIRM) |- if FINISH succeeded: Kill src VM |- if FINISH failed : Run src VM
/Christian

-----Original Message----- From: Daniel P. Berrange [mailto:berrange@redhat.com] Sent: Thursday, April 21, 2011 4:44 AM To: Stefan Berger Cc: Christian Benvenuti (benve); eblake@redhat.com; laine@laine.org; chrisw@redhat.com; libvir-list@redhat.com; David Wang (dwang2); Roopa Prabhu (roprabhu); Gerhard Stenzel; Jens Osterkamp; Anthony Liguori Subject: Re: [libvirt] [PATCH 3/6] Introduce yet another migration version in API.
On 04/20/2011 11:38 PM, Christian Benvenuti (benve) wrote:
On 04/20/2011 05:28 PM, Christian Benvenuti (benve) wrote:
Daniel, I looked at the patch-set you sent out on the 2/9/11
[libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver http://www.mail-archive.com/libvir-
What is the status of this new migration protocol? Is there any pending issue blocking its integration?
I would like to propose an RFC enhancement to the migration algorithm.
Here is a quick summary of the proposal/idea.
- finer control on migration result
- possibility of specifying what features cannot fail their initialization on the dst host during migration. Migration should not succeed if any of them fails. - optional: each one of those features should be able to provide a deinit function to cleanup resources on the dst host if migration fails.
This functionality would come useful for the (NIC) set port profile feature VDP (802.1Qbg/1Qbh), but what I propose is a generic config option / API that can be used by any feature.
And now the details.
---------------------------------------------- enhancement: finer control on migration result ----------------------------------------------
There are different reasons why a VM may need (or be forced) to migrate. You can classify the types of the migrations also based on different semantics. For simplicity I'll classify them into two categories, based on how important it is for the VM to migrate as fast as possible:
(1) It IS important
In this case, whether the VM will not be able to (temporary) make use of certain resources (for example the network) on
dst host, is not that important, because the completion of
On Thu, Apr 21, 2011 at 07:37:30AM -0400, Stefan Berger wrote: list@redhat.com/msg33223.html the the
migration is considered higher priority. A possible scenario could be a server that must migrate ASAP because of a disaster/emergency.
(2) It IS NOT important
I can think of a VM whose applications/servers need a network connection in order to work properly. Loosing such network connectivity as a consequence of a migration would not be acceptable (or highly undesirable).
Given the case (2) above, I have a comment about the Finish step, with regards to the port profile (VDP) codepath.
The call to
qemuMigrationVPAssociatePortProfile
in qemuMigrationFinish
can fail, but its result (success or failure) does not influence the result of the migration Finish step (it was already like this in migration V2).
I *believe* the underlying problem is Qemu's switch-over. Once Qemu decides that the migration was successful, Qemu on the source side dies and continues running on the destination side. I don't think there are more handshakes foreseen with higher layers that this could be reversed or the switch-over delayed, but correct me if I am wrong... Actually I think this is not what happens in migration V3. My understanding is this:
- the qemu cmdline built by Libvirt on the dst host during Prepare3 includes the "-S" option (ie no autostart)
- the VM on the dst host does not start running until libvirt calls qemuProcessStartCPUs in the Finish3 step. This fn simply sends the "-cont" cmd to the monitor to start the VM/CPUs. That's correct, but it's doing this already in v2. The non-autostart (-S) corresponds to Qemu's autostart here (migration.c):
void process_incoming_migration(QEMUFile *f) { if (qemu_loadvm_state(f) < 0) { fprintf(stderr, "load of migration failed\n"); exit(0); } qemu_announce_self(); DPRINTF("successfully loaded vm state\n");
incoming_expected = false;
if (autostart) vm_start(); }
and simply doesn't start the VM. After this function is called all sockets are closed and the communication with the source host is cut. I don't think it allows for fall-back at this point.
Sure it does. As long as the destination QEMU CPUs have not been started, you can fallback by simply killing the dest QEMU and restarting CPUs on the src QEMU.
Rather we may need a 'wait' option for migration and before the
qemu_put_byte(f, QEMU_VM_EOF);
in qemu_savevm_state_complete() sync with the monitor and either wait for something like migrate_finish or migrate_cancel.
The real problem, is that while we can tell from 'info migrate' on the src, when the src has finished sending all data, there is no way to ask the dest QEMU when it has finished receiving all data.
So libvirt assumes that 'src finished sending' == success, and will attempt to start the dst QEMU CPUs. As raised many times in the past, we need 'info migrate' to work on the destination too, in order to query success/fail. And ideally need async events emitted when migration completes, so we don't have to poll on 'info migrate' every 50ms
What is the reason why this point ('info migrate' on dst host) was raised many times in the past but it was never implemented? Is there any technical reason? Assuming the interval between the moment src host finishes sending and the dst host finishes receiving is not too big (which is a fair assumption I guess), libvirt on the dst host could block on that condition (ie wait for 'info migrate' to say "rx all" in the dst host) at the beginning of Finish3. Is it doable? /Christian

On Wed, Apr 20, 2011 at 10:38:40PM -0500, Christian Benvenuti (benve) wrote:
On 04/20/2011 05:28 PM, Christian Benvenuti (benve) wrote:
Daniel, I looked at the patch-set you sent out on the 2/9/11
[libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver http://www.mail-archive.com/libvir-list@redhat.com/msg33223.html
What is the status of this new migration protocol? Is there any pending issue blocking its integration?
I would like to propose an RFC enhancement to the migration algorithm.
Here is a quick summary of the proposal/idea.
- finer control on migration result
- possibility of specifying what features cannot fail their initialization on the dst host during migration. Migration should not succeed if any of them fails. - optional: each one of those features should be able to provide a deinit function to cleanup resources on the dst host if migration fails.
This functionality would come useful for the (NIC) set port profile feature VDP (802.1Qbg/1Qbh), but what I propose is a generic config option / API that can be used by any feature.
And now the details.
---------------------------------------------- enhancement: finer control on migration result ----------------------------------------------
There are different reasons why a VM may need (or be forced) to migrate. You can classify the types of the migrations also based on different semantics. For simplicity I'll classify them into two categories, based on how important it is for the VM to migrate as fast as possible:
(1) It IS important
In this case, whether the VM will not be able to (temporary) make use of certain resources (for example the network) on the dst host, is not that important, because the completion of the migration is considered higher priority. A possible scenario could be a server that must migrate ASAP because of a disaster/emergency.
(2) It IS NOT important
I can think of a VM whose applications/servers need a network connection in order to work properly. Loosing such network connectivity as a consequence of a migration would not be acceptable (or highly undesirable).
Given the case (2) above, I have a comment about the Finish step, with regards to the port profile (VDP) codepath.
The call to
qemuMigrationVPAssociatePortProfile
in qemuMigrationFinish
can fail, but its result (success or failure) does not influence the result of the migration Finish step (it was already like this in migration V2).
I *believe* the underlying problem is Qemu's switch-over. Once Qemu decides that the migration was successful, Qemu on the source side dies and continues running on the destination side. I don't think there are more handshakes foreseen with higher layers that this could be reversed or the switch-over delayed, but correct me if I am wrong...
Actually I think this is not what happens in migration V3. My understanding is this:
- the qemu cmdline built by Libvirt on the dst host during Prepare3 includes the "-S" option (ie no autostart)
- the VM on the dst host does not start running until libvirt calls qemuProcessStartCPUs in the Finish3 step. This fn simply sends the "-cont" cmd to the monitor to start the VM/CPUs.
If I am right, libvirt does have full control on how/when to start the CPU on the dst host, it is not QEMU to do it.
That is correct. It is libvirt that decides when to kill the src QEMU, and in theory when to start the CPUs on the dst. In practice we can't reliably determine the latter, until QEMU gives us more info, so we just start CPUs once src has finished sending data.
The only thing libvirt does not control is when to pause the VM on the src host: QEMU does it during the stage-2 of the live-ram-copy based on the max_downtime config. However I do not think this represents a problem.
Correct, that's no problem. The key thing is that libvirt decides when to start dst CPUs & kill src QEMU process. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

I assume that 2 associations with the same profile are possible with 802.1Qbg and Qbh.
According to the IEEE standard, it is possible to have a transient period during which you have two associations and therefore standard-compliant implementations are supposed to be able to handle it. /Christian

On Wed, Apr 20, 2011 at 04:28:12PM -0500, Christian Benvenuti (benve) wrote:
Daniel, I looked at the patch-set you sent out on the 2/9/11
[libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver http://www.mail-archive.com/libvir-list@redhat.com/msg33223.html
What is the status of this new migration protocol? Is there any pending issue blocking its integration?
I would like to propose an RFC enhancement to the migration algorithm.
Here is a quick summary of the proposal/idea.
- finer control on migration result
- possibility of specifying what features cannot fail their initialization on the dst host during migration. Migration should not succeed if any of them fails. - optional: each one of those features should be able to provide a deinit function to cleanup resources on the dst host if migration fails.
I'm not really very convinced that allowing things to fail during migration is useful, not least from the POV of the app determining just what worked vs failed. IMHO, migration should be atomic and only succeed, if everything related to the guest succeeds. If we want to support a case where the dst can't connect to the same network, then we should add an API that lets us change the network backend on the fly. NB, this is different from NIC hotplug/unplug, in that the guest device never changes. We merely change how the guest is connected to the host. So, if you have a guest with a NIC configureed using VEPA, then you can re-configure it to use a 'no op' (aka /dev/null) NIC backend, and then perform the migration.
---------------------------------------------- enhancement: finer control on migration result ----------------------------------------------
There are different reasons why a VM may need (or be forced) to migrate. You can classify the types of the migrations also based on different semantics. For simplicity I'll classify them into two categories, based on how important it is for the VM to migrate as fast as possible:
(1) It IS important
In this case, whether the VM will not be able to (temporary) make use of certain resources (for example the network) on the dst host, is not that important, because the completion of the migration is considered higher priority. A possible scenario could be a server that must migrate ASAP because of a disaster/emergency.
(2) It IS NOT important
I can think of a VM whose applications/servers need a network connection in order to work properly. Loosing such network connectivity as a consequence of a migration would not be acceptable (or highly undesirable).
Given the case (2) above, I have a comment about the Finish step, with regards to the port profile (VDP) codepath.
The call to
qemuMigrationVPAssociatePortProfile
in qemuMigrationFinish
can fail, but its result (success or failure) does not influence the result of the migration Finish step (it was already like this in migration V2). It is therefore possible for a VM to lose its network connectivity after a (successful) migration.
That is a clear bug in our code - something that can fail during migration, should be causing migration to abort, leaving the guest on the original host unchanged.
BTW, would the new functionality being discussed in this 3D
"RFC: virInterface change transaction API" http://www.redhat.com/archives/libvir-list/2011-April/msg00499.html
be able to provide the same configuration "atomicity" (ie, rollback in case of migration failure)? My understanding is that:
- Such new framework would apply to (host) network config only. Even though it may cover the VDP (port profile) use case I mentioned above, it would not apply to other features that may need some kind of rollback after a migration failure.
Host NIC configuration from virInterface isn't really tied into the migration at all. It is something the mgmt app has to do on the source & dest hosts, before setting up any VMs, let alone getting to migration. Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

-----Original Message----- From: Daniel P. Berrange [mailto:berrange@redhat.com] Sent: Thursday, April 21, 2011 5:02 AM To: Christian Benvenuti (benve) Cc: eblake@redhat.com; stefanb@linux.vnet.ibm.com; laine@laine.org; chrisw@redhat.com; libvir-list@redhat.com; David Wang (dwang2); Roopa Prabhu (roprabhu) Subject: Re: [libvirt] [PATCH 3/6] Introduce yet another migration version in API.
On Wed, Apr 20, 2011 at 04:28:12PM -0500, Christian Benvenuti (benve) wrote:
Daniel, I looked at the patch-set you sent out on the 2/9/11
[libvirt] [PATCH 0/6] Introduce a new migration protocol to QEMU driver http://www.mail-archive.com/libvir-list@redhat.com/msg33223.html
What is the status of this new migration protocol? Is there any pending issue blocking its integration?
I would like to propose an RFC enhancement to the migration algorithm.
Here is a quick summary of the proposal/idea.
- finer control on migration result
- possibility of specifying what features cannot fail their initialization on the dst host during migration. Migration should not succeed if any of them fails. - optional: each one of those features should be able to provide a deinit function to cleanup resources on the dst host if migration fails.
I'm not really very convinced that allowing things to fail during migration is useful, not least from the POV of the app determining just what worked vs failed. IMHO, migration should be atomic and only succeed, if everything related to the guest succeeds.
I agree, the migration should be atomic ... in most cases. However, the scenario I was referring to is different: in that scenario you want the migration to complete as fast as possible. Because of that, blocking on operations (such as net config) which may need several seconds to complete and that could be taken care in a second moment would not be desirable. In such a scenario you would be choosing between these two results: 1) you may lose the VM because the migration does not complete fast enough 2) you have more chances of successfully moving the VM, but you may lose something like network connectivity. This loss may be acceptable (or less of a problem compared to the loss of the VM run-time state) in the sense that mgmt can re-try initializing it.
If we want to support a case where the dst can't connect to the same network, then we should add an API that lets us change the network backend on the fly.
'on the fly' when/where? On the dst host at the end of the migration?
NB, this is different from NIC hotplug/unplug, in that the guest device never changes. We merely change how the guest is connected to the host. So, if you have a guest with a NIC configureed using VEPA, then you can re-configure it to use a 'no op' (aka /dev/null) NIC backend, and then perform the migration.
Wouldn't it be better to let the migration try to migrate the net connection too, and, in case it failed, let mgmt re-try if configured to do so, based for example on a configuration policy (ie, "net persistent re-try 10s")?
---------------------------------------------- enhancement: finer control on migration result ----------------------------------------------
There are different reasons why a VM may need (or be forced) to migrate. You can classify the types of the migrations also based on different semantics. For simplicity I'll classify them into two categories, based on how important it is for the VM to migrate as fast as possible:
(1) It IS important
In this case, whether the VM will not be able to (temporary) make use of certain resources (for example the network) on the dst host, is not that important, because the completion of the migration is considered higher priority. A possible scenario could be a server that must migrate ASAP because of a disaster/emergency.
(2) It IS NOT important
I can think of a VM whose applications/servers need a network connection in order to work properly. Loosing such network connectivity as a consequence of a migration would not be acceptable (or highly undesirable).
Given the case (2) above, I have a comment about the Finish step, with regards to the port profile (VDP) codepath.
The call to
qemuMigrationVPAssociatePortProfile
in qemuMigrationFinish
can fail, but its result (success or failure) does not influence the result of the migration Finish step (it was already like this in migration V2). It is therefore possible for a VM to lose its network connectivity after a (successful) migration.
That is a clear bug in our code - something that can fail during migration, should be causing migration to abort, leaving the guest on the original host unchanged.
I agree. However I believe there may be corner cases (like emergency scenarios) where it can make sense to relax this policy. /Christian
BTW, would the new functionality being discussed in this 3D
"RFC: virInterface change transaction API" http://www.redhat.com/archives/libvir-list/2011-April/msg00499.html
be able to provide the same configuration "atomicity" (ie, rollback in case of migration failure)? My understanding is that:
- Such new framework would apply to (host) network config only. Even though it may cover the VDP (port profile) use case I mentioned above, it would not apply to other features that may need some kind of rollback after a migration failure.
Host NIC configuration from virInterface isn't really tied into the migration at all. It is something the mgmt app has to do on the source & dest hosts, before setting up any VMs, let alone getting to migration.
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt- manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk- vnc :|

* src/remote/remote_protocol.x: Define wire protocol for migration protocol v3 * daemon/remote.c: Server side dispatch * src/remote/remote_driver.c: Client side serialization * src/remote/remote_protocol.c, src/remote/remote_protocol.h, daemon/remote_dispatch_args.h, daemon/remote_dispatch_prototypes.h, daemon/remote_dispatch_ret.h, daemon/remote_dispatch_table.h: Re-generate files --- daemon/remote.c | 282 ++++++++++++++++++++++++++ daemon/remote_dispatch_args.h | 6 + daemon/remote_dispatch_prototypes.h | 48 +++++ daemon/remote_dispatch_ret.h | 5 + daemon/remote_dispatch_table.h | 30 +++ src/remote/remote_driver.c | 371 ++++++++++++++++++++++++++++++++++- src/remote/remote_protocol.c | 163 +++++++++++++++ src/remote/remote_protocol.h | 140 +++++++++++++ src/remote/remote_protocol.x | 79 ++++++++- 9 files changed, 1117 insertions(+), 7 deletions(-) diff --git a/daemon/remote.c b/daemon/remote.c index d53b466..6592c85 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -72,6 +72,7 @@ static virStorageVolPtr get_nonnull_storage_vol (virConnectPtr conn, remote_nonn static virSecretPtr get_nonnull_secret (virConnectPtr conn, remote_nonnull_secret secret); static virNWFilterPtr get_nonnull_nwfilter (virConnectPtr conn, remote_nonnull_nwfilter nwfilter); static virDomainSnapshotPtr get_nonnull_domain_snapshot (virDomainPtr domain, remote_nonnull_domain_snapshot snapshot); +static int make_domain (remote_domain *dom_dst, virDomainPtr dom_src); static void make_nonnull_domain (remote_nonnull_domain *dom_dst, virDomainPtr dom_src); static void make_nonnull_network (remote_nonnull_network *net_dst, virNetworkPtr net_src); static void make_nonnull_interface (remote_nonnull_interface *interface_dst, virInterfacePtr interface_src); @@ -7012,6 +7013,272 @@ remoteDispatchDomainOpenConsole(struct qemud_server *server ATTRIBUTE_UNUSED, } +static int +remoteDispatchDomainMigrateBegin3(struct qemud_server *server ATTRIBUTE_UNUSED, + struct qemud_client *client ATTRIBUTE_UNUSED, + virConnectPtr conn, + remote_message_header *hdr ATTRIBUTE_UNUSED, + remote_error *rerr, + remote_domain_migrate_begin3_args *args, + remote_domain_migrate_begin3_ret *ret) +{ + char *xml = NULL; + virDomainPtr dom; + char *dname; + char *cookieout = NULL; + int cookieoutlen = 0; + + dom = get_nonnull_domain (conn, args->dom); + if (dom == NULL) { + remoteDispatchConnError(rerr, conn); + return -1; + } + + dname = args->dname == NULL ? NULL : *args->dname; + + xml = virDomainMigrateBegin3(dom, + &cookieout, &cookieoutlen, + args->flags, dname, args->resource); + virDomainFree(dom); + if (xml == NULL) { + remoteDispatchConnError(rerr, conn); + return -1; + } + + /* remoteDispatchClientRequest will free cookie and + * the xml string if there is one. + */ + ret->cookie_out.cookie_out_len = cookieoutlen; + ret->cookie_out.cookie_out_val = cookieout; + ret->xml = xml; + + return 0; +} + + +static int +remoteDispatchDomainMigratePrepare3(struct qemud_server *server ATTRIBUTE_UNUSED, + struct qemud_client *client ATTRIBUTE_UNUSED, + virConnectPtr conn, + remote_message_header *hdr ATTRIBUTE_UNUSED, + remote_error *rerr, + remote_domain_migrate_prepare3_args *args, + remote_domain_migrate_prepare3_ret *ret) +{ + int r; + char *cookieout = NULL; + int cookieoutlen = 0; + char *uri_in; + char **uri_out; + char *dname; + CHECK_CONN (client); + + uri_in = args->uri_in == NULL ? NULL : *args->uri_in; + dname = args->dname == NULL ? NULL : *args->dname; + + /* Wacky world of XDR ... */ + if (VIR_ALLOC(uri_out) < 0) { + remoteDispatchOOMError(rerr); + return -1; + } + + r = virDomainMigratePrepare3(conn, + args->cookie_in.cookie_in_val, + args->cookie_in.cookie_in_len, + &cookieout, &cookieoutlen, + uri_in, uri_out, + args->flags, dname, args->resource, + args->dom_xml); + if (r == -1) { + remoteDispatchConnError(rerr, conn); + return -1; + } + + /* remoteDispatchClientRequest will free cookie, uri_out and + * the string if there is one. + */ + ret->cookie_out.cookie_out_len = cookieoutlen; + ret->cookie_out.cookie_out_val = cookieout; + ret->uri_out = *uri_out == NULL ? NULL : uri_out; + + return 0; +} + +static int +remoteDispatchDomainMigratePrepareTunnel3(struct qemud_server *server ATTRIBUTE_UNUSED, + struct qemud_client *client, + virConnectPtr conn, + remote_message_header *hdr, + remote_error *rerr, + remote_domain_migrate_prepare_tunnel3_args *args, + remote_domain_migrate_prepare_tunnel3_ret *ret) +{ + int r; + char *dname; + char *cookieout = NULL; + int cookieoutlen = 0; + struct qemud_client_stream *stream; + CHECK_CONN (client); + + dname = args->dname == NULL ? NULL : *args->dname; + + stream = remoteCreateClientStream(conn, hdr); + if (!stream) { + remoteDispatchOOMError(rerr); + return -1; + } + + r = virDomainMigratePrepareTunnel3(conn, stream->st, + args->cookie_in.cookie_in_val, + args->cookie_in.cookie_in_len, + &cookieout, &cookieoutlen, + args->flags, dname, args->resource, + args->dom_xml); + if (r == -1) { + remoteFreeClientStream(client, stream); + remoteDispatchConnError(rerr, conn); + return -1; + } + + if (remoteAddClientStream(client, stream, 0) < 0) { + remoteDispatchConnError(rerr, conn); + virStreamAbort(stream->st); + remoteFreeClientStream(client, stream); + VIR_FREE(cookieout); + return -1; + } + + /* remoteDispatchClientRequest will free cookie + */ + ret->cookie_out.cookie_out_len = cookieoutlen; + ret->cookie_out.cookie_out_val = cookieout; + + return 0; +} + +static int +remoteDispatchDomainMigratePerform3(struct qemud_server *server ATTRIBUTE_UNUSED, + struct qemud_client *client ATTRIBUTE_UNUSED, + virConnectPtr conn, + remote_message_header *hdr ATTRIBUTE_UNUSED, + remote_error *rerr, + remote_domain_migrate_perform3_args *args, + remote_domain_migrate_perform3_ret *ret) +{ + int r; + virDomainPtr dom; + char *dname; + char *cookieout = NULL; + int cookieoutlen = 0; + + dom = get_nonnull_domain (conn, args->dom); + if (dom == NULL) { + remoteDispatchConnError(rerr, conn); + return -1; + } + + dname = args->dname == NULL ? NULL : *args->dname; + + r = virDomainMigratePerform3(dom, + args->cookie_in.cookie_in_val, + args->cookie_in.cookie_in_len, + &cookieout, &cookieoutlen, + args->uri, + args->flags, dname, args->resource); + virDomainFree (dom); + if (r == -1) { + remoteDispatchConnError(rerr, conn); + return -1; + } + + /* remoteDispatchClientRequest will free cookie + */ + ret->cookie_out.cookie_out_len = cookieoutlen; + ret->cookie_out.cookie_out_val = cookieout; + + return 0; +} + + +static int +remoteDispatchDomainMigrateFinish3(struct qemud_server *server ATTRIBUTE_UNUSED, + struct qemud_client *client ATTRIBUTE_UNUSED, + virConnectPtr conn, + remote_message_header *hdr ATTRIBUTE_UNUSED, + remote_error *rerr, + remote_domain_migrate_finish3_args *args, + remote_domain_migrate_finish3_ret *ret) +{ + virDomainPtr ddom = NULL; + char *cookieout = NULL; + int cookieoutlen = 0; + int rv; + CHECK_CONN (client); + + rv = virDomainMigrateFinish3(conn, args->dname, + args->cookie_in.cookie_in_val, + args->cookie_in.cookie_in_len, + &cookieout, &cookieoutlen, + args->uri, + args->flags, + args->cancelled, + &ddom); + if (rv < 0) { + remoteDispatchConnError(rerr, conn); + return -1; + } + + if (ddom) { + if (make_domain(&ret->ddom, ddom) < 0) { + remoteDispatchConnError(rerr, conn); + virDomainFree(ddom); + VIR_FREE(cookieout); + return -1; + } + virDomainFree(ddom); + } + + /* remoteDispatchClientRequest will free cookie + */ + ret->cookie_out.cookie_out_len = cookieoutlen; + ret->cookie_out.cookie_out_val = cookieout; + + return 0; +} + + +static int +remoteDispatchDomainMigrateConfirm3(struct qemud_server *server ATTRIBUTE_UNUSED, + struct qemud_client *client ATTRIBUTE_UNUSED, + virConnectPtr conn, + remote_message_header *hdr ATTRIBUTE_UNUSED, + remote_error *rerr, + remote_domain_migrate_confirm3_args *args, + void *ret ATTRIBUTE_UNUSED) +{ + int r; + virDomainPtr dom; + + dom = get_nonnull_domain (conn, args->dom); + if (dom == NULL) { + remoteDispatchConnError(rerr, conn); + return -1; + } + + r = virDomainMigrateConfirm3(dom, + args->cookie_in.cookie_in_val, + args->cookie_in.cookie_in_len, + args->flags, args->cancelled); + virDomainFree (dom); + if (r == -1) { + remoteDispatchConnError(rerr, conn); + return -1; + } + + return 0; +} + + /*----- Helpers. -----*/ /* get_nonnull_domain and get_nonnull_network turn an on-wire @@ -7077,6 +7344,21 @@ get_nonnull_domain_snapshot (virDomainPtr domain, remote_nonnull_domain_snapshot } /* Make remote_nonnull_domain and remote_nonnull_network. */ +static int +make_domain (remote_domain *dom_dst, virDomainPtr dom_src) +{ + remote_domain rdom; + if (VIR_ALLOC(rdom) < 0) + return -1; + + rdom->id = dom_src->id; + rdom->name = strdup(dom_src->name); + memcpy(rdom->uuid, dom_src->uuid, VIR_UUID_BUFLEN); + + *dom_dst = rdom; + return 0; +} + static void make_nonnull_domain (remote_nonnull_domain *dom_dst, virDomainPtr dom_src) { diff --git a/daemon/remote_dispatch_args.h b/daemon/remote_dispatch_args.h index 57962d1..6603da4 100644 --- a/daemon/remote_dispatch_args.h +++ b/daemon/remote_dispatch_args.h @@ -172,3 +172,9 @@ remote_domain_open_console_args val_remote_domain_open_console_args; remote_domain_is_updated_args val_remote_domain_is_updated_args; remote_get_sysinfo_args val_remote_get_sysinfo_args; + remote_domain_migrate_begin3_args val_remote_domain_migrate_begin3_args; + remote_domain_migrate_prepare3_args val_remote_domain_migrate_prepare3_args; + remote_domain_migrate_prepare_tunnel3_args val_remote_domain_migrate_prepare_tunnel3_args; + remote_domain_migrate_perform3_args val_remote_domain_migrate_perform3_args; + remote_domain_migrate_finish3_args val_remote_domain_migrate_finish3_args; + remote_domain_migrate_confirm3_args val_remote_domain_migrate_confirm3_args; diff --git a/daemon/remote_dispatch_prototypes.h b/daemon/remote_dispatch_prototypes.h index e59701a..602a052 100644 --- a/daemon/remote_dispatch_prototypes.h +++ b/daemon/remote_dispatch_prototypes.h @@ -418,6 +418,22 @@ static int remoteDispatchDomainMemoryStats( remote_error *err, remote_domain_memory_stats_args *args, remote_domain_memory_stats_ret *ret); +static int remoteDispatchDomainMigrateBegin3( + struct qemud_server *server, + struct qemud_client *client, + virConnectPtr conn, + remote_message_header *hdr, + remote_error *err, + remote_domain_migrate_begin3_args *args, + remote_domain_migrate_begin3_ret *ret); +static int remoteDispatchDomainMigrateConfirm3( + struct qemud_server *server, + struct qemud_client *client, + virConnectPtr conn, + remote_message_header *hdr, + remote_error *err, + remote_domain_migrate_confirm3_args *args, + void *ret); static int remoteDispatchDomainMigrateFinish( struct qemud_server *server, struct qemud_client *client, @@ -434,6 +450,14 @@ static int remoteDispatchDomainMigrateFinish2( remote_error *err, remote_domain_migrate_finish2_args *args, remote_domain_migrate_finish2_ret *ret); +static int remoteDispatchDomainMigrateFinish3( + struct qemud_server *server, + struct qemud_client *client, + virConnectPtr conn, + remote_message_header *hdr, + remote_error *err, + remote_domain_migrate_finish3_args *args, + remote_domain_migrate_finish3_ret *ret); static int remoteDispatchDomainMigratePerform( struct qemud_server *server, struct qemud_client *client, @@ -442,6 +466,14 @@ static int remoteDispatchDomainMigratePerform( remote_error *err, remote_domain_migrate_perform_args *args, void *ret); +static int remoteDispatchDomainMigratePerform3( + struct qemud_server *server, + struct qemud_client *client, + virConnectPtr conn, + remote_message_header *hdr, + remote_error *err, + remote_domain_migrate_perform3_args *args, + remote_domain_migrate_perform3_ret *ret); static int remoteDispatchDomainMigratePrepare( struct qemud_server *server, struct qemud_client *client, @@ -458,6 +490,14 @@ static int remoteDispatchDomainMigratePrepare2( remote_error *err, remote_domain_migrate_prepare2_args *args, remote_domain_migrate_prepare2_ret *ret); +static int remoteDispatchDomainMigratePrepare3( + struct qemud_server *server, + struct qemud_client *client, + virConnectPtr conn, + remote_message_header *hdr, + remote_error *err, + remote_domain_migrate_prepare3_args *args, + remote_domain_migrate_prepare3_ret *ret); static int remoteDispatchDomainMigratePrepareTunnel( struct qemud_server *server, struct qemud_client *client, @@ -466,6 +506,14 @@ static int remoteDispatchDomainMigratePrepareTunnel( remote_error *err, remote_domain_migrate_prepare_tunnel_args *args, void *ret); +static int remoteDispatchDomainMigratePrepareTunnel3( + struct qemud_server *server, + struct qemud_client *client, + virConnectPtr conn, + remote_message_header *hdr, + remote_error *err, + remote_domain_migrate_prepare_tunnel3_args *args, + remote_domain_migrate_prepare_tunnel3_ret *ret); static int remoteDispatchDomainMigrateSetMaxDowntime( struct qemud_server *server, struct qemud_client *client, diff --git a/daemon/remote_dispatch_ret.h b/daemon/remote_dispatch_ret.h index 78e5469..f793ac2 100644 --- a/daemon/remote_dispatch_ret.h +++ b/daemon/remote_dispatch_ret.h @@ -139,3 +139,8 @@ remote_domain_get_vcpus_flags_ret val_remote_domain_get_vcpus_flags_ret; remote_domain_is_updated_ret val_remote_domain_is_updated_ret; remote_get_sysinfo_ret val_remote_get_sysinfo_ret; + remote_domain_migrate_begin3_ret val_remote_domain_migrate_begin3_ret; + remote_domain_migrate_prepare3_ret val_remote_domain_migrate_prepare3_ret; + remote_domain_migrate_prepare_tunnel3_ret val_remote_domain_migrate_prepare_tunnel3_ret; + remote_domain_migrate_perform3_ret val_remote_domain_migrate_perform3_ret; + remote_domain_migrate_finish3_ret val_remote_domain_migrate_finish3_ret; diff --git a/daemon/remote_dispatch_table.h b/daemon/remote_dispatch_table.h index 5d27390..e01aec7 100644 --- a/daemon/remote_dispatch_table.h +++ b/daemon/remote_dispatch_table.h @@ -1022,3 +1022,33 @@ .args_filter = (xdrproc_t) xdr_remote_get_sysinfo_args, .ret_filter = (xdrproc_t) xdr_remote_get_sysinfo_ret, }, +{ /* DomainMigrateBegin3 => 204 */ + .fn = (dispatch_fn) remoteDispatchDomainMigrateBegin3, + .args_filter = (xdrproc_t) xdr_remote_domain_migrate_begin3_args, + .ret_filter = (xdrproc_t) xdr_remote_domain_migrate_begin3_ret, +}, +{ /* DomainMigratePrepare3 => 205 */ + .fn = (dispatch_fn) remoteDispatchDomainMigratePrepare3, + .args_filter = (xdrproc_t) xdr_remote_domain_migrate_prepare3_args, + .ret_filter = (xdrproc_t) xdr_remote_domain_migrate_prepare3_ret, +}, +{ /* DomainMigratePrepareTunnel3 => 206 */ + .fn = (dispatch_fn) remoteDispatchDomainMigratePrepareTunnel3, + .args_filter = (xdrproc_t) xdr_remote_domain_migrate_prepare_tunnel3_args, + .ret_filter = (xdrproc_t) xdr_remote_domain_migrate_prepare_tunnel3_ret, +}, +{ /* DomainMigratePerform3 => 207 */ + .fn = (dispatch_fn) remoteDispatchDomainMigratePerform3, + .args_filter = (xdrproc_t) xdr_remote_domain_migrate_perform3_args, + .ret_filter = (xdrproc_t) xdr_remote_domain_migrate_perform3_ret, +}, +{ /* DomainMigrateFinish3 => 208 */ + .fn = (dispatch_fn) remoteDispatchDomainMigrateFinish3, + .args_filter = (xdrproc_t) xdr_remote_domain_migrate_finish3_args, + .ret_filter = (xdrproc_t) xdr_remote_domain_migrate_finish3_ret, +}, +{ /* DomainMigrateConfirm3 => 209 */ + .fn = (dispatch_fn) remoteDispatchDomainMigrateConfirm3, + .args_filter = (xdrproc_t) xdr_remote_domain_migrate_confirm3_args, + .ret_filter = (xdrproc_t) xdr_void, +}, diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index 464222c..3b36cc5 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -243,6 +243,7 @@ static int remoteAuthPolkit (virConnectPtr conn, struct private_data *priv, int virReportErrorHelper(NULL, VIR_FROM_REMOTE, code, __FILE__, \ __FUNCTION__, __LINE__, __VA_ARGS__) +static virDomainPtr get_domain (virConnectPtr conn, remote_domain domain); static virDomainPtr get_nonnull_domain (virConnectPtr conn, remote_nonnull_domain domain); static virNetworkPtr get_nonnull_network (virConnectPtr conn, remote_nonnull_network network); static virNWFilterPtr get_nonnull_nwfilter (virConnectPtr conn, remote_nonnull_nwfilter nwfilter); @@ -3264,6 +3265,7 @@ error: goto done; } + static virDomainPtr remoteDomainMigrateFinish2 (virConnectPtr dconn, const char *dname, @@ -3301,6 +3303,7 @@ done: return ddom; } + static int remoteListDefinedDomains (virConnectPtr conn, char **const names, int maxnames) { @@ -9439,6 +9442,346 @@ done: return rv; } + +static char * +remoteDomainMigrateBegin3(virDomainPtr domain, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource) +{ + char *rv = NULL; + remote_domain_migrate_begin3_args args; + remote_domain_migrate_begin3_ret ret; + struct private_data *priv = domain->conn->privateData; + + remoteDriverLock(priv); + + memset(&args, 0, sizeof(args)); + memset(&ret, 0, sizeof(ret)); + + make_nonnull_domain (&args.dom, domain); + args.flags = flags; + args.dname = dname == NULL ? NULL : (char **) &dname; + args.resource = resource; + + if (call (domain->conn, priv, 0, REMOTE_PROC_DOMAIN_MIGRATE_BEGIN3, + (xdrproc_t) xdr_remote_domain_migrate_begin3_args, (char *) &args, + (xdrproc_t) xdr_remote_domain_migrate_begin3_ret, (char *) &ret) == -1) + goto done; + + if (ret.cookie_out.cookie_out_len > 0) { + if (!cookieout || !cookieoutlen) { + remoteError(VIR_ERR_INTERNAL_ERROR, "%s", + _("caller ignores cookieout or cookieoutlen")); + goto error; + } + *cookieout = ret.cookie_out.cookie_out_val; /* Caller frees. */ + *cookieoutlen = ret.cookie_out.cookie_out_len; + } + + rv = ret.xml; /* caller frees */ + +done: + remoteDriverUnlock(priv); + return rv; + +error: + VIR_FREE(ret.cookie_out.cookie_out_val); + goto done; +} + + +static int +remoteDomainMigratePrepare3(virConnectPtr dconn, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri_in, + char **uri_out, + unsigned long flags, + const char *dname, + unsigned long resource, + const char *dom_xml) +{ + int rv = -1; + remote_domain_migrate_prepare3_args args; + remote_domain_migrate_prepare3_ret ret; + struct private_data *priv = dconn->privateData; + + remoteDriverLock(priv); + + memset(&args, 0, sizeof(args)); + memset(&ret, 0, sizeof(ret)); + + args.cookie_in.cookie_in_val = (char *)cookiein; + args.cookie_in.cookie_in_len = cookieinlen; + args.uri_in = uri_in == NULL ? NULL : (char **) &uri_in; + args.flags = flags; + args.dname = dname == NULL ? NULL : (char **) &dname; + args.resource = resource; + args.dom_xml = (char *) dom_xml; + + memset (&ret, 0, sizeof ret); + if (call (dconn, priv, 0, REMOTE_PROC_DOMAIN_MIGRATE_PREPARE3, + (xdrproc_t) xdr_remote_domain_migrate_prepare3_args, (char *) &args, + (xdrproc_t) xdr_remote_domain_migrate_prepare3_ret, (char *) &ret) == -1) + goto done; + + if (ret.cookie_out.cookie_out_len > 0) { + if (!cookieout || !cookieoutlen) { + remoteError(VIR_ERR_INTERNAL_ERROR, "%s", + _("caller ignores cookieout or cookieoutlen")); + goto error; + } + *cookieout = ret.cookie_out.cookie_out_val; /* Caller frees. */ + *cookieoutlen = ret.cookie_out.cookie_out_len; + } + if (ret.uri_out) { + if (!uri_out) { + remoteError(VIR_ERR_INTERNAL_ERROR, "%s", + _("caller ignores uri_out")); + goto error; + } + *uri_out = *ret.uri_out; /* Caller frees. */ + } + + rv = 0; + +done: + remoteDriverUnlock(priv); + return rv; +error: + VIR_FREE(ret.cookie_out.cookie_out_val); + if (ret.uri_out) + VIR_FREE(*ret.uri_out); + goto done; +} + + +static int +remoteDomainMigratePrepareTunnel3(virConnectPtr dconn, + virStreamPtr st, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource, + const char *dom_xml) +{ + struct private_data *priv = dconn->privateData; + struct private_stream_data *privst = NULL; + int rv = -1; + remote_domain_migrate_prepare_tunnel3_args args; + remote_domain_migrate_prepare_tunnel3_ret ret; + + remoteDriverLock(priv); + + memset(&args, 0, sizeof(args)); + memset(&ret, 0, sizeof(ret)); + + if (!(privst = remoteStreamOpen(st, 1, + REMOTE_PROC_DOMAIN_MIGRATE_PREPARE_TUNNEL3, + priv->counter))) + goto done; + + st->driver = &remoteStreamDrv; + st->privateData = privst; + + args.cookie_in.cookie_in_val = (char *)cookiein; + args.cookie_in.cookie_in_len = cookieinlen; + args.flags = flags; + args.dname = dname == NULL ? NULL : (char **) &dname; + args.resource = resource; + args.dom_xml = (char *) dom_xml; + + if (call(dconn, priv, 0, REMOTE_PROC_DOMAIN_MIGRATE_PREPARE_TUNNEL3, + (xdrproc_t) xdr_remote_domain_migrate_prepare_tunnel3_args, (char *) &args, + (xdrproc_t) xdr_remote_domain_migrate_prepare_tunnel3_ret, (char *) &ret) == -1) { + remoteStreamRelease(st); + goto done; + } + + if (ret.cookie_out.cookie_out_len > 0) { + if (!cookieout || !cookieoutlen) { + remoteError(VIR_ERR_INTERNAL_ERROR, "%s", + _("caller ignores cookieout or cookieoutlen")); + goto error; + } + *cookieout = ret.cookie_out.cookie_out_val; /* Caller frees. */ + *cookieoutlen = ret.cookie_out.cookie_out_len; + } + + rv = 0; + +done: + remoteDriverUnlock(priv); + return rv; + +error: + VIR_FREE(ret.cookie_out.cookie_out_val); + goto done; +} + + +static int +remoteDomainMigratePerform3(virDomainPtr dom, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long resource) +{ + int rv = -1; + remote_domain_migrate_perform3_args args; + remote_domain_migrate_perform3_ret ret; + struct private_data *priv = dom->conn->privateData; + + remoteDriverLock(priv); + + memset(&args, 0, sizeof(args)); + memset(&ret, 0, sizeof(ret)); + + make_nonnull_domain(&args.dom, dom); + + args.cookie_in.cookie_in_val = (char *)cookiein; + args.cookie_in.cookie_in_len = cookieinlen; + args.uri = (char *) uri; + args.flags = flags; + args.dname = dname == NULL ? NULL : (char **) &dname; + args.resource = resource; + + if (call (dom->conn, priv, 0, REMOTE_PROC_DOMAIN_MIGRATE_PERFORM3, + (xdrproc_t) xdr_remote_domain_migrate_perform3_args, (char *) &args, + (xdrproc_t) xdr_remote_domain_migrate_perform3_ret, (char *) &ret) == -1) + goto done; + + if (ret.cookie_out.cookie_out_len > 0) { + if (!cookieout || !cookieoutlen) { + remoteError(VIR_ERR_INTERNAL_ERROR, "%s", + _("caller ignores cookieout or cookieoutlen")); + goto error; + } + *cookieout = ret.cookie_out.cookie_out_val; /* Caller frees. */ + *cookieoutlen = ret.cookie_out.cookie_out_len; + } + + rv = 0; + +done: + remoteDriverUnlock(priv); + return rv; + +error: + VIR_FREE(ret.cookie_out.cookie_out_val); + goto done; +} + + +static int +remoteDomainMigrateFinish3(virConnectPtr dconn, + const char *dname, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + int cancelled, + virDomainPtr *ddom) +{ + remote_domain_migrate_finish3_args args; + remote_domain_migrate_finish3_ret ret; + struct private_data *priv = dconn->privateData; + int rv = -1; + + remoteDriverLock(priv); + + *ddom = NULL; + memset(&args, 0, sizeof(args)); + memset(&ret, 0, sizeof(ret)); + + args.cookie_in.cookie_in_val = (char *)cookiein; + args.cookie_in.cookie_in_len = cookieinlen; + args.dname = (char *) dname; + args.uri = (char *) uri; + args.flags = flags; + args.cancelled = cancelled; + + if (call (dconn, priv, 0, REMOTE_PROC_DOMAIN_MIGRATE_FINISH3, + (xdrproc_t) xdr_remote_domain_migrate_finish3_args, (char *) &args, + (xdrproc_t) xdr_remote_domain_migrate_finish3_ret, (char *) &ret) == -1) + goto done; + + *ddom = get_domain(dconn, ret.ddom); + + if (ret.cookie_out.cookie_out_len > 0) { + if (!cookieout || !cookieoutlen) { + remoteError(VIR_ERR_INTERNAL_ERROR, "%s", + _("caller ignores cookieout or cookieoutlen")); + goto error; + } + *cookieout = ret.cookie_out.cookie_out_val; /* Caller frees. */ + *cookieoutlen = ret.cookie_out.cookie_out_len; + ret.cookie_out.cookie_out_val = NULL; + ret.cookie_out.cookie_out_len = 0; + } + + xdr_free ((xdrproc_t) &xdr_remote_domain_migrate_finish2_ret, (char *) &ret); + + rv = 0; + +done: + remoteDriverUnlock(priv); + return rv; + +error: + VIR_FREE(ret.cookie_out.cookie_out_val); + goto done; +} + + +static int +remoteDomainMigrateConfirm3(virDomainPtr domain, + const char *cookiein, + int cookieinlen, + unsigned long flags, + int cancelled) +{ + int rv = -1; + remote_domain_migrate_confirm3_args args; + struct private_data *priv = domain->conn->privateData; + + remoteDriverLock(priv); + + memset(&args, 0, sizeof(args)); + + make_nonnull_domain (&args.dom, domain); + args.cookie_in.cookie_in_len = cookieinlen; + args.cookie_in.cookie_in_val = (char *) cookiein; + args.flags = flags; + args.cancelled = cancelled; + + if (call (domain->conn, priv, 0, REMOTE_PROC_DOMAIN_MIGRATE_CONFIRM3, + (xdrproc_t) xdr_remote_domain_migrate_confirm3_args, (char *) &args, + (xdrproc_t) xdr_void, (char *) NULL) == -1) + goto done; + + rv = 0; + +done: + remoteDriverUnlock(priv); + return rv; +} + + /*----------------------------------------------------------------------*/ static struct remote_thread_call * @@ -10714,6 +11057,22 @@ remoteDomainEventQueueFlush(int timer ATTRIBUTE_UNUSED, void *opaque) * but if they do then virterror_internal.has been set. */ static virDomainPtr +get_domain (virConnectPtr conn, remote_domain domain) +{ + virDomainPtr dom = NULL; + if (domain) { + dom = virGetDomain (conn, domain->name, BAD_CAST domain->uuid); + if (dom) dom->id = domain->id; + } + return dom; +} + +/* get_nonnull_domain and get_nonnull_network turn an on-wire + * (name, uuid) pair into virDomainPtr or virNetworkPtr object. + * These can return NULL if underlying memory allocations fail, + * but if they do then virterror_internal.has been set. + */ +static virDomainPtr get_nonnull_domain (virConnectPtr conn, remote_nonnull_domain domain) { virDomainPtr dom; @@ -10946,12 +11305,12 @@ static virDriver remote_driver = { remoteDomainSetMemoryParameters, /* domainSetMemoryParameters */ remoteDomainGetMemoryParameters, /* domainGetMemoryParameters */ remoteDomainOpenConsole, /* domainOpenConsole */ - NULL, /* domainMigrateBegin3 */ - NULL, /* domainMigratePrepare3 */ - NULL, /* domainMigratePrepareTunnel3 */ - NULL, /* domainMigratePerform3 */ - NULL, /* domainMigrateFinish3 */ - NULL, /* domainMigrateConfirm3 */ + remoteDomainMigrateBegin3, /* domainMigrateBegin3 */ + remoteDomainMigratePrepare3, /* domainMigratePrepare3 */ + remoteDomainMigratePrepareTunnel3, /* domainMigratePrepareTunnel3 */ + remoteDomainMigratePerform3, /* domainMigratePerform3 */ + remoteDomainMigrateFinish3, /* domainMigrateFinish3 */ + remoteDomainMigrateConfirm3, /* domainMigrateConfirm3 */ }; static virNetworkDriver network_driver = { diff --git a/src/remote/remote_protocol.c b/src/remote/remote_protocol.c index bae92ca..e1df37a 100644 --- a/src/remote/remote_protocol.c +++ b/src/remote/remote_protocol.c @@ -3758,6 +3758,169 @@ xdr_remote_domain_open_console_args (XDR *xdrs, remote_domain_open_console_args } bool_t +xdr_remote_domain_migrate_begin3_args (XDR *xdrs, remote_domain_migrate_begin3_args *objp) +{ + + if (!xdr_remote_nonnull_domain (xdrs, &objp->dom)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->flags)) + return FALSE; + if (!xdr_remote_string (xdrs, &objp->dname)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->resource)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_begin3_ret (XDR *xdrs, remote_domain_migrate_begin3_ret *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_out.cookie_out_val; + + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_out.cookie_out_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + if (!xdr_remote_nonnull_string (xdrs, &objp->xml)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_prepare3_args (XDR *xdrs, remote_domain_migrate_prepare3_args *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_in.cookie_in_val; + + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_in.cookie_in_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + if (!xdr_remote_string (xdrs, &objp->uri_in)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->flags)) + return FALSE; + if (!xdr_remote_string (xdrs, &objp->dname)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->resource)) + return FALSE; + if (!xdr_remote_nonnull_string (xdrs, &objp->dom_xml)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_prepare3_ret (XDR *xdrs, remote_domain_migrate_prepare3_ret *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_out.cookie_out_val; + + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_out.cookie_out_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + if (!xdr_remote_string (xdrs, &objp->uri_out)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_prepare_tunnel3_args (XDR *xdrs, remote_domain_migrate_prepare_tunnel3_args *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_in.cookie_in_val; + + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_in.cookie_in_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->flags)) + return FALSE; + if (!xdr_remote_string (xdrs, &objp->dname)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->resource)) + return FALSE; + if (!xdr_remote_nonnull_string (xdrs, &objp->dom_xml)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_prepare_tunnel3_ret (XDR *xdrs, remote_domain_migrate_prepare_tunnel3_ret *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_out.cookie_out_val; + + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_out.cookie_out_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_perform3_args (XDR *xdrs, remote_domain_migrate_perform3_args *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_in.cookie_in_val; + + if (!xdr_remote_nonnull_domain (xdrs, &objp->dom)) + return FALSE; + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_in.cookie_in_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + if (!xdr_remote_nonnull_string (xdrs, &objp->uri)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->flags)) + return FALSE; + if (!xdr_remote_string (xdrs, &objp->dname)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->resource)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_perform3_ret (XDR *xdrs, remote_domain_migrate_perform3_ret *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_out.cookie_out_val; + + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_out.cookie_out_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_finish3_args (XDR *xdrs, remote_domain_migrate_finish3_args *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_in.cookie_in_val; + + if (!xdr_remote_nonnull_string (xdrs, &objp->dname)) + return FALSE; + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_in.cookie_in_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + if (!xdr_remote_nonnull_string (xdrs, &objp->uri)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->flags)) + return FALSE; + if (!xdr_int (xdrs, &objp->cancelled)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_finish3_ret (XDR *xdrs, remote_domain_migrate_finish3_ret *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_out.cookie_out_val; + + if (!xdr_remote_domain (xdrs, &objp->ddom)) + return FALSE; + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_out.cookie_out_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + return TRUE; +} + +bool_t +xdr_remote_domain_migrate_confirm3_args (XDR *xdrs, remote_domain_migrate_confirm3_args *objp) +{ + char **objp_cpp0 = (char **) (void *) &objp->cookie_in.cookie_in_val; + + if (!xdr_remote_nonnull_domain (xdrs, &objp->dom)) + return FALSE; + if (!xdr_bytes (xdrs, objp_cpp0, (u_int *) &objp->cookie_in.cookie_in_len, REMOTE_MIGRATE_COOKIE_MAX)) + return FALSE; + if (!xdr_uint64_t (xdrs, &objp->flags)) + return FALSE; + if (!xdr_int (xdrs, &objp->cancelled)) + return FALSE; + return TRUE; +} + +bool_t xdr_remote_procedure (XDR *xdrs, remote_procedure *objp) { diff --git a/src/remote/remote_protocol.h b/src/remote/remote_protocol.h index 46d526a..016a2bb 100644 --- a/src/remote/remote_protocol.h +++ b/src/remote/remote_protocol.h @@ -2124,6 +2124,118 @@ struct remote_domain_open_console_args { u_int flags; }; typedef struct remote_domain_open_console_args remote_domain_open_console_args; + +struct remote_domain_migrate_begin3_args { + remote_nonnull_domain dom; + uint64_t flags; + remote_string dname; + uint64_t resource; +}; +typedef struct remote_domain_migrate_begin3_args remote_domain_migrate_begin3_args; + +struct remote_domain_migrate_begin3_ret { + struct { + u_int cookie_out_len; + char *cookie_out_val; + } cookie_out; + remote_nonnull_string xml; +}; +typedef struct remote_domain_migrate_begin3_ret remote_domain_migrate_begin3_ret; + +struct remote_domain_migrate_prepare3_args { + struct { + u_int cookie_in_len; + char *cookie_in_val; + } cookie_in; + remote_string uri_in; + uint64_t flags; + remote_string dname; + uint64_t resource; + remote_nonnull_string dom_xml; +}; +typedef struct remote_domain_migrate_prepare3_args remote_domain_migrate_prepare3_args; + +struct remote_domain_migrate_prepare3_ret { + struct { + u_int cookie_out_len; + char *cookie_out_val; + } cookie_out; + remote_string uri_out; +}; +typedef struct remote_domain_migrate_prepare3_ret remote_domain_migrate_prepare3_ret; + +struct remote_domain_migrate_prepare_tunnel3_args { + struct { + u_int cookie_in_len; + char *cookie_in_val; + } cookie_in; + uint64_t flags; + remote_string dname; + uint64_t resource; + remote_nonnull_string dom_xml; +}; +typedef struct remote_domain_migrate_prepare_tunnel3_args remote_domain_migrate_prepare_tunnel3_args; + +struct remote_domain_migrate_prepare_tunnel3_ret { + struct { + u_int cookie_out_len; + char *cookie_out_val; + } cookie_out; +}; +typedef struct remote_domain_migrate_prepare_tunnel3_ret remote_domain_migrate_prepare_tunnel3_ret; + +struct remote_domain_migrate_perform3_args { + remote_nonnull_domain dom; + struct { + u_int cookie_in_len; + char *cookie_in_val; + } cookie_in; + remote_nonnull_string uri; + uint64_t flags; + remote_string dname; + uint64_t resource; +}; +typedef struct remote_domain_migrate_perform3_args remote_domain_migrate_perform3_args; + +struct remote_domain_migrate_perform3_ret { + struct { + u_int cookie_out_len; + char *cookie_out_val; + } cookie_out; +}; +typedef struct remote_domain_migrate_perform3_ret remote_domain_migrate_perform3_ret; + +struct remote_domain_migrate_finish3_args { + remote_nonnull_string dname; + struct { + u_int cookie_in_len; + char *cookie_in_val; + } cookie_in; + remote_nonnull_string uri; + uint64_t flags; + int cancelled; +}; +typedef struct remote_domain_migrate_finish3_args remote_domain_migrate_finish3_args; + +struct remote_domain_migrate_finish3_ret { + remote_domain ddom; + struct { + u_int cookie_out_len; + char *cookie_out_val; + } cookie_out; +}; +typedef struct remote_domain_migrate_finish3_ret remote_domain_migrate_finish3_ret; + +struct remote_domain_migrate_confirm3_args { + remote_nonnull_domain dom; + struct { + u_int cookie_in_len; + char *cookie_in_val; + } cookie_in; + uint64_t flags; + int cancelled; +}; +typedef struct remote_domain_migrate_confirm3_args remote_domain_migrate_confirm3_args; #define REMOTE_PROGRAM 0x20008086 #define REMOTE_PROTOCOL_VERSION 1 @@ -2331,6 +2443,12 @@ enum remote_procedure { REMOTE_PROC_DOMAIN_OPEN_CONSOLE = 201, REMOTE_PROC_DOMAIN_IS_UPDATED = 202, REMOTE_PROC_GET_SYSINFO = 203, + REMOTE_PROC_DOMAIN_MIGRATE_BEGIN3 = 204, + REMOTE_PROC_DOMAIN_MIGRATE_PREPARE3 = 205, + REMOTE_PROC_DOMAIN_MIGRATE_PREPARE_TUNNEL3 = 206, + REMOTE_PROC_DOMAIN_MIGRATE_PERFORM3 = 207, + REMOTE_PROC_DOMAIN_MIGRATE_FINISH3 = 208, + REMOTE_PROC_DOMAIN_MIGRATE_CONFIRM3 = 209, }; typedef enum remote_procedure remote_procedure; @@ -2705,6 +2823,17 @@ extern bool_t xdr_remote_domain_snapshot_current_ret (XDR *, remote_domain_snap extern bool_t xdr_remote_domain_revert_to_snapshot_args (XDR *, remote_domain_revert_to_snapshot_args*); extern bool_t xdr_remote_domain_snapshot_delete_args (XDR *, remote_domain_snapshot_delete_args*); extern bool_t xdr_remote_domain_open_console_args (XDR *, remote_domain_open_console_args*); +extern bool_t xdr_remote_domain_migrate_begin3_args (XDR *, remote_domain_migrate_begin3_args*); +extern bool_t xdr_remote_domain_migrate_begin3_ret (XDR *, remote_domain_migrate_begin3_ret*); +extern bool_t xdr_remote_domain_migrate_prepare3_args (XDR *, remote_domain_migrate_prepare3_args*); +extern bool_t xdr_remote_domain_migrate_prepare3_ret (XDR *, remote_domain_migrate_prepare3_ret*); +extern bool_t xdr_remote_domain_migrate_prepare_tunnel3_args (XDR *, remote_domain_migrate_prepare_tunnel3_args*); +extern bool_t xdr_remote_domain_migrate_prepare_tunnel3_ret (XDR *, remote_domain_migrate_prepare_tunnel3_ret*); +extern bool_t xdr_remote_domain_migrate_perform3_args (XDR *, remote_domain_migrate_perform3_args*); +extern bool_t xdr_remote_domain_migrate_perform3_ret (XDR *, remote_domain_migrate_perform3_ret*); +extern bool_t xdr_remote_domain_migrate_finish3_args (XDR *, remote_domain_migrate_finish3_args*); +extern bool_t xdr_remote_domain_migrate_finish3_ret (XDR *, remote_domain_migrate_finish3_ret*); +extern bool_t xdr_remote_domain_migrate_confirm3_args (XDR *, remote_domain_migrate_confirm3_args*); extern bool_t xdr_remote_procedure (XDR *, remote_procedure*); extern bool_t xdr_remote_message_type (XDR *, remote_message_type*); extern bool_t xdr_remote_message_status (XDR *, remote_message_status*); @@ -3053,6 +3182,17 @@ extern bool_t xdr_remote_domain_snapshot_current_ret (); extern bool_t xdr_remote_domain_revert_to_snapshot_args (); extern bool_t xdr_remote_domain_snapshot_delete_args (); extern bool_t xdr_remote_domain_open_console_args (); +extern bool_t xdr_remote_domain_migrate_begin3_args (); +extern bool_t xdr_remote_domain_migrate_begin3_ret (); +extern bool_t xdr_remote_domain_migrate_prepare3_args (); +extern bool_t xdr_remote_domain_migrate_prepare3_ret (); +extern bool_t xdr_remote_domain_migrate_prepare_tunnel3_args (); +extern bool_t xdr_remote_domain_migrate_prepare_tunnel3_ret (); +extern bool_t xdr_remote_domain_migrate_perform3_args (); +extern bool_t xdr_remote_domain_migrate_perform3_ret (); +extern bool_t xdr_remote_domain_migrate_finish3_args (); +extern bool_t xdr_remote_domain_migrate_finish3_ret (); +extern bool_t xdr_remote_domain_migrate_confirm3_args (); extern bool_t xdr_remote_procedure (); extern bool_t xdr_remote_message_type (); extern bool_t xdr_remote_message_status (); diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x index e77aca1..7eb27bd 100644 --- a/src/remote/remote_protocol.x +++ b/src/remote/remote_protocol.x @@ -1874,6 +1874,77 @@ struct remote_domain_open_console_args { unsigned int flags; }; +struct remote_domain_migrate_begin3_args { + remote_nonnull_domain dom; + unsigned hyper flags; + remote_string dname; + unsigned hyper resource; +}; + +struct remote_domain_migrate_begin3_ret { + opaque cookie_out<REMOTE_MIGRATE_COOKIE_MAX>; + remote_nonnull_string xml; +}; + +struct remote_domain_migrate_prepare3_args { + opaque cookie_in<REMOTE_MIGRATE_COOKIE_MAX>; + remote_string uri_in; + unsigned hyper flags; + remote_string dname; + unsigned hyper resource; + remote_nonnull_string dom_xml; +}; + +struct remote_domain_migrate_prepare3_ret { + opaque cookie_out<REMOTE_MIGRATE_COOKIE_MAX>; + remote_string uri_out; +}; + +struct remote_domain_migrate_prepare_tunnel3_args { + opaque cookie_in<REMOTE_MIGRATE_COOKIE_MAX>; + unsigned hyper flags; + remote_string dname; + unsigned hyper resource; + remote_nonnull_string dom_xml; +}; + +struct remote_domain_migrate_prepare_tunnel3_ret { + opaque cookie_out<REMOTE_MIGRATE_COOKIE_MAX>; +}; + +struct remote_domain_migrate_perform3_args { + remote_nonnull_domain dom; + opaque cookie_in<REMOTE_MIGRATE_COOKIE_MAX>; + remote_nonnull_string uri; + unsigned hyper flags; + remote_string dname; + unsigned hyper resource; +}; + +struct remote_domain_migrate_perform3_ret { + opaque cookie_out<REMOTE_MIGRATE_COOKIE_MAX>; +}; + +struct remote_domain_migrate_finish3_args { + remote_nonnull_string dname; + opaque cookie_in<REMOTE_MIGRATE_COOKIE_MAX>; + remote_nonnull_string uri; + unsigned hyper flags; + int cancelled; +}; + +struct remote_domain_migrate_finish3_ret { + remote_domain ddom; + opaque cookie_out<REMOTE_MIGRATE_COOKIE_MAX>; +}; + +struct remote_domain_migrate_confirm3_args { + remote_nonnull_domain dom; + opaque cookie_in<REMOTE_MIGRATE_COOKIE_MAX>; + unsigned hyper flags; + int cancelled; +}; + /*----- Protocol. -----*/ /* Define the program number, protocol version and procedure numbers here. */ @@ -2103,7 +2174,13 @@ enum remote_procedure { REMOTE_PROC_DOMAIN_OPEN_CONSOLE = 201, REMOTE_PROC_DOMAIN_IS_UPDATED = 202, - REMOTE_PROC_GET_SYSINFO = 203 + REMOTE_PROC_GET_SYSINFO = 203, + REMOTE_PROC_DOMAIN_MIGRATE_BEGIN3 = 204, + REMOTE_PROC_DOMAIN_MIGRATE_PREPARE3 = 205, + REMOTE_PROC_DOMAIN_MIGRATE_PREPARE_TUNNEL3 = 206, + REMOTE_PROC_DOMAIN_MIGRATE_PERFORM3 = 207, + REMOTE_PROC_DOMAIN_MIGRATE_FINISH3 = 208, + REMOTE_PROC_DOMAIN_MIGRATE_CONFIRM3 = 209 /* * Notice how the entries are grouped in sets of 10 ? -- 1.7.4

On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
* src/remote/remote_protocol.x: Define wire protocol for migration protocol v3 * daemon/remote.c: Server side dispatch * src/remote/remote_driver.c: Client side serialization * src/remote/remote_protocol.c, src/remote/remote_protocol.h, daemon/remote_dispatch_args.h, daemon/remote_dispatch_prototypes.h, daemon/remote_dispatch_ret.h, daemon/remote_dispatch_table.h: Re-generate files
+ +static int +remoteDispatchDomainMigratePrepare3(struct qemud_server *server ATTRIBUTE_UNUSED, + struct qemud_client *client ATTRIBUTE_UNUSED,
+ + /* Wacky world of XDR ... */ + if (VIR_ALLOC(uri_out) < 0) { + remoteDispatchOOMError(rerr); + return -1; + } + + r = virDomainMigratePrepare3(conn, + args->cookie_in.cookie_in_val, + args->cookie_in.cookie_in_len, + &cookieout, &cookieoutlen, + uri_in, uri_out, + args->flags, dname, args->resource, + args->dom_xml); + if (r == -1) { + remoteDispatchConnError(rerr, conn); + return -1;
This error path leaks uri_out; but the success path is good. ACK with that nit fixed. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

The migration protocol has support for a 'cookie' parameter which is an opaque array of bytes as far as libvirt is concerned. Drivers may use this for passing around arbitrary extra data they might need during migration. The QEMU driver needs todo a few things: - Pass hostname/uuid to allow strict protection against localhost migration attempts - Pass SPICE/VNC server port from the target back to the source to allow seemless relocation of client sessions - Pass lock driver state from source to destination * src/libvirt_private.syms: Export virXMLParseStrHelper * src/qemu/qemu_migration.c, src/qemu/qemu_migration.h: Parsing and formatting of migration cookies * src/qemu/qemu_driver.c: Pass in cookie parameters where possible * src/remote/remote_protocol.h, src/remote/remote_protocol.x: Change cookie max length to 16384 bytes --- src/libvirt_private.syms | 1 + src/qemu/qemu_driver.c | 20 +- src/qemu/qemu_migration.c | 600 +++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_migration.h | 16 ++ src/remote/remote_protocol.h | 2 +- src/remote/remote_protocol.x | 2 +- 6 files changed, 626 insertions(+), 15 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d1e2f4c..38912dc 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -946,6 +946,7 @@ virStrerror; # xml.h +virXMLParseStrHelper; virXMLPropString; virXPathBoolean; virXPathInt; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 82f735a..4f72c07 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5378,8 +5378,9 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, } qemuDriverLock(driver); - ret = qemuMigrationPrepareTunnel(driver, dconn, st, - dname, dom_xml); + ret = qemuMigrationPrepareTunnel(driver, dconn, + NULL, 0, NULL, NULL, /* No cookies in v2 */ + st, dname, dom_xml); qemuDriverUnlock(driver); cleanup: @@ -5392,8 +5393,8 @@ cleanup: */ static int ATTRIBUTE_NONNULL (5) qemudDomainMigratePrepare2 (virConnectPtr dconn, - char **cookie ATTRIBUTE_UNUSED, - int *cookielen ATTRIBUTE_UNUSED, + char **cookie, + int *cookielen, const char *uri_in, char **uri_out, unsigned long flags, @@ -5432,6 +5433,8 @@ qemudDomainMigratePrepare2 (virConnectPtr dconn, } ret = qemuMigrationPrepareDirect(driver, dconn, + NULL, 0, /* No input cookies in v2 */ + cookie, cookielen, uri_in, uri_out, dname, dom_xml); @@ -5475,8 +5478,9 @@ qemudDomainMigratePerform (virDomainPtr dom, } ret = qemuMigrationPerform(driver, dom->conn, vm, - uri, flags, - dname, resource); + uri, cookie, cookielen, + NULL, NULL, /* No output cookies in v2 */ + flags, dname, resource); cleanup: qemuDriverUnlock(driver); @@ -5519,7 +5523,9 @@ qemudDomainMigrateFinish2 (virConnectPtr dconn, goto cleanup; } - dom = qemuMigrationFinish(driver, dconn, vm, flags, retcode); + dom = qemuMigrationFinish(driver, dconn, vm, + NULL, 0, NULL, NULL, /* No cookies in v2 */ + flags, retcode); cleanup: if (orig_err) { diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 8d23cc5..097acaf 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -22,6 +22,8 @@ #include <config.h> #include <sys/time.h> +#include <gnutls/gnutls.h> +#include <gnutls/x509.h> #include "qemu_migration.h" #include "qemu_monitor.h" @@ -37,11 +39,515 @@ #include "files.h" #include "datatypes.h" #include "fdstream.h" +#include "uuid.h" + #define VIR_FROM_THIS VIR_FROM_QEMU #define timeval_to_ms(tv) (((tv).tv_sec * 1000ull) + ((tv).tv_usec / 1000)) +enum qemuMigrationCookieFlags { + QEMU_MIGRATION_COOKIE_GRAPHICS = (1 << 0), +}; + +typedef struct _qemuMigrationCookieGraphics qemuMigrationCookieGraphics; +typedef qemuMigrationCookieGraphics *qemuMigrationCookieGraphicsPtr; +struct _qemuMigrationCookieGraphics { + int type; + int port; + int tlsPort; + char *listen; + char *tlsSubject; +}; + +typedef struct _qemuMigrationCookie qemuMigrationCookie; +typedef qemuMigrationCookie *qemuMigrationCookiePtr; +struct _qemuMigrationCookie { + int flags; + + /* Host properties */ + unsigned char hostuuid[VIR_UUID_BUFLEN]; + char *hostname; + + /* Guest properties */ + unsigned char uuid[VIR_UUID_BUFLEN]; + char *name; + + /* If (flags & QEMU_MIGRATION_COOKIE_GRAPHICS) */ + qemuMigrationCookieGraphicsPtr graphics; +}; + +static void qemuMigrationCookieGraphicsFree(qemuMigrationCookieGraphicsPtr grap) +{ + if (!grap) + return; + VIR_FREE(grap->listen); + VIR_FREE(grap->tlsSubject); + VIR_FREE(grap); +} + + +static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) +{ + if (!mig) + return; + + if (mig->flags & QEMU_MIGRATION_COOKIE_GRAPHICS) + qemuMigrationCookieGraphicsFree(mig->graphics); + + VIR_FREE(mig->hostname); + VIR_FREE(mig->name); + VIR_FREE(mig); +} + + +static char * +qemuDomainExtractTLSSubject(const char *certdir) +{ + char *certfile = NULL; + char *subject = NULL; + char *pemdata = NULL; + gnutls_datum_t pemdatum; + gnutls_x509_crt_t cert; + int ret; + size_t subjectlen; + + if (virAsprintf(&certfile, "%s/server-cert.pem", certdir) < 0) + goto no_memory; + + if (virFileReadAll(certfile, 8192, &pemdata) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("unable to read server cert %s"), certfile); + goto error; + } + + ret = gnutls_x509_crt_init(&cert); + if (ret < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot initialize cert object: %s"), + gnutls_strerror(ret)); + goto error; + } + + pemdatum.data = (unsigned char *)pemdata; + pemdatum.size = strlen(pemdata); + + ret = gnutls_x509_crt_import(cert, &pemdatum, GNUTLS_X509_FMT_PEM); + if (ret < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cannot load cert data from %s: %s"), + certfile, gnutls_strerror(ret)); + goto error; + } + + subjectlen = 1024; + if (VIR_ALLOC_N(subject, subjectlen+1) < 0) + goto no_memory; + + gnutls_x509_crt_get_dn(cert, subject, &subjectlen); + subject[subjectlen] = '\0'; + + VIR_FREE(certfile); + VIR_FREE(pemdata); + + return subject; + +no_memory: + virReportOOMError(); +error: + VIR_FREE(certfile); + VIR_FREE(pemdata); + return NULL; +} + + +static qemuMigrationCookieGraphicsPtr +qemuMigrationCookieGraphicsAlloc(struct qemud_driver *driver, + virDomainGraphicsDefPtr def) +{ + qemuMigrationCookieGraphicsPtr mig = NULL; + const char *listenAddr; + + if (VIR_ALLOC(mig) < 0) + goto no_memory; + + mig->type = def->type; + if (mig->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) { + mig->port = def->data.vnc.port; + listenAddr = def->data.vnc.listenAddr; + if (!listenAddr) + listenAddr = driver->vncListen; + + if (driver->vncTLS && + !(mig->tlsSubject = qemuDomainExtractTLSSubject(driver->vncTLSx509certdir))) + goto error; + } else { + mig->port = def->data.spice.port; + if (driver->spiceTLS) + mig->tlsPort = def->data.spice.tlsPort; + else + mig->tlsPort = -1; + listenAddr = def->data.spice.listenAddr; + if (!listenAddr) + listenAddr = driver->spiceListen; + + if (driver->spiceTLS && + !(mig->tlsSubject = qemuDomainExtractTLSSubject(driver->spiceTLSx509certdir))) + goto error; + } + if (!(mig->listen = strdup(listenAddr))) + goto no_memory; + + return mig; + +no_memory: + virReportOOMError(); +error: + qemuMigrationCookieGraphicsFree(mig); + return NULL; +} + + +static qemuMigrationCookiePtr +qemuMigrationCookieNew(virDomainObjPtr dom) +{ + qemuMigrationCookiePtr mig = NULL; + + if (VIR_ALLOC(mig) < 0) + goto no_memory; + + if (!(mig->name = strdup(dom->def->name))) + goto no_memory; + memcpy(mig->uuid, dom->def->uuid, VIR_UUID_BUFLEN); + + if (!(mig->hostname = virGetHostname(NULL))) + goto no_memory; + if (virGetHostUUID(mig->hostuuid) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Unable to obtain host UUID")); + goto error; + } + + return mig; + +no_memory: + virReportOOMError(); +error: + qemuMigrationCookieFree(mig); + return NULL; +} + + +static int +qemuMigrationCookieAddGraphics(qemuMigrationCookiePtr mig, + struct qemud_driver *driver, + virDomainObjPtr dom) +{ + if (mig->flags & QEMU_MIGRATION_COOKIE_GRAPHICS) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Migration graphics data already present")); + return -1; + } + + if (dom->def->ngraphics == 1 && + (dom->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC || + dom->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) && + !(mig->graphics = qemuMigrationCookieGraphicsAlloc(driver, dom->def->graphics[0]))) + return -1; + + mig->flags |= QEMU_MIGRATION_COOKIE_GRAPHICS; + + return 0; +} + + +static void qemuMigrationCookieGraphicsXMLFormat(virBufferPtr buf, + qemuMigrationCookieGraphicsPtr grap) +{ + virBufferVSprintf(buf, " <graphics type='%s' port='%d' listen='%s'", + virDomainGraphicsTypeToString(grap->type), + grap->port, grap->listen); + if (grap->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) + virBufferVSprintf(buf, " tlsPort='%d'", grap->tlsPort); + if (grap->tlsSubject) { + virBufferVSprintf(buf, ">\n"); + virBufferEscapeString(buf, " <cert info='subject' value='%s'/>\n", grap->tlsSubject); + virBufferVSprintf(buf, " </graphics>\n"); + } else { + virBufferVSprintf(buf, "/>\n"); + } +} + + +static void qemuMigrationCookieXMLFormat(virBufferPtr buf, + qemuMigrationCookiePtr mig) +{ + char uuidstr[VIR_UUID_STRING_BUFLEN]; + char hostuuidstr[VIR_UUID_STRING_BUFLEN]; + + virUUIDFormat(mig->uuid, uuidstr); + virUUIDFormat(mig->hostuuid, hostuuidstr); + + virBufferVSprintf(buf, "<qemu-migration>\n"); + virBufferEscapeString(buf, " <name>%s</name>\n", mig->name); + virBufferVSprintf(buf, " <uuid>%s</uuid>\n", uuidstr); + virBufferEscapeString(buf, " <hostname>%s</hostname>\n", mig->hostname); + virBufferVSprintf(buf, " <hostuuid>%s</hostuuid>\n", hostuuidstr); + + if (mig->flags & QEMU_MIGRATION_COOKIE_GRAPHICS) + qemuMigrationCookieGraphicsXMLFormat(buf, mig->graphics); + + virBufferAddLit(buf, "</qemu-migration>\n"); +} + + +static char *qemuMigrationCookieXMLFormatStr(qemuMigrationCookiePtr mig) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + + qemuMigrationCookieXMLFormat(&buf, mig); + + if (virBufferError(&buf)) { + virReportOOMError(); + return NULL; + } + + return virBufferContentAndReset(&buf); +} + + +static qemuMigrationCookieGraphicsPtr +qemuMigrationCookieGraphicsXMLParse(xmlXPathContextPtr ctxt) +{ + qemuMigrationCookieGraphicsPtr grap; + long port; + char *tmp; + + if (VIR_ALLOC(grap) < 0) + goto no_memory; + + if (!(tmp = virXPathString("string(./graphics/@type)", ctxt))) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing type attribute in migration data")); + goto error; + } + if ((grap->type = virDomainGraphicsTypeFromString(tmp)) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("unknown graphics type %s"), tmp); + VIR_FREE(tmp); + goto error; + } + if (virXPathLong("string(./graphics/@port)", ctxt, &port) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing port attribute in migration data")); + goto error; + } + grap->port = (int)port; + if (grap->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) { + if (virXPathLong("string(./graphics/@tlsPort)", ctxt, &port) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing port attribute in migration data")); + goto error; + } + grap->tlsPort = (int)port; + } + if (!(grap->listen = virXPathString("string(./graphics/@listen)", ctxt))) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing listen attribute in migration data")); + goto error; + } + /* Optional */ + grap->tlsSubject = virXPathString("string(./graphics/cert[ info='subject']/@value)", ctxt); + + + return grap; + +no_memory: + virReportOOMError(); +error: + qemuMigrationCookieGraphicsFree(grap); + return NULL; +} + + +static int +qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, + xmlXPathContextPtr ctxt, + int flags) +{ + char uuidstr[VIR_UUID_STRING_BUFLEN]; + char *tmp; + + /* We don't store the uuid, name, hostname, or hostuuid + * values. We just compare them to local data todo some + * sanity checking on migration operation + */ + + /* Extract domain name */ + if (!(tmp = virXPathString("string(./name[1])", ctxt))) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing name element in migration data")); + goto error; + } + if (STRNEQ(tmp, mig->name)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Incoming cookie data had unexpected name %s vs %s"), + tmp, mig->name); + goto error; + } + VIR_FREE(tmp); + + /* Extract domain uuid */ + tmp = virXPathString("string(./uuid[1])", ctxt); + if (!tmp) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing uuid element in migration data")); + goto error; + } + virUUIDFormat(mig->uuid, uuidstr); + if (STRNEQ(tmp, uuidstr)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Incoming cookie data had unexpected UUID %s vs %s"), + tmp, uuidstr); + } + VIR_FREE(tmp); + + /* Check & forbid "localhost" migration */ + if (!(tmp = virXPathString("string(./hostname[1])", ctxt))) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing hostname element in migration data")); + goto error; + } + if (STREQ(tmp, mig->hostname)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Attempt to migrate guest to the same host %s"), + tmp); + goto error; + } + VIR_FREE(tmp); + + if (!(tmp = virXPathString("string(./hostuuid[1])", ctxt))) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing hostuuid element in migration data")); + goto error; + } + virUUIDFormat(mig->hostuuid, uuidstr); + if (STREQ(tmp, uuidstr)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("Attempt to migrate guest to the same host %s"), + tmp); + goto error; + } + VIR_FREE(tmp); + + if ((flags & QEMU_MIGRATION_COOKIE_GRAPHICS) && + virXPathBoolean("count(./graphics) > 0", ctxt) && + (!(mig->graphics = qemuMigrationCookieGraphicsXMLParse(ctxt)))) + goto error; + + return 0; + +error: + VIR_FREE(tmp); + return -1; +} + + +static int +qemuMigrationCookieXMLParseStr(qemuMigrationCookiePtr mig, + const char *xml, + int flags) +{ + xmlDocPtr doc = NULL; + xmlXPathContextPtr ctxt = NULL; + int ret; + + VIR_DEBUG("xml=%s", NULLSTR(xml)); + + if (!(doc = virXMLParseString(xml, "qemumigration.xml"))) + goto cleanup; + + if ((ctxt = xmlXPathNewContext(doc)) == NULL) { + virReportOOMError(); + goto cleanup; + } + + ctxt->node = xmlDocGetRootElement(doc); + + ret = qemuMigrationCookieXMLParse(mig, ctxt, flags); + +cleanup: + xmlXPathFreeContext(ctxt); + xmlFreeDoc(doc); + + return ret; +} + + +static int +qemuMigrationBakeCookie(qemuMigrationCookiePtr mig, + struct qemud_driver *driver, + virDomainObjPtr dom, + char **cookieout, + int *cookieoutlen, + int flags) +{ + if (!cookieout || !cookieoutlen) { + qemuReportError(VIR_ERR_INVALID_ARG, "%s", + _("missing migration cookie data")); + return -1; + } + + *cookieoutlen = 0; + + if (flags & QEMU_MIGRATION_COOKIE_GRAPHICS && + qemuMigrationCookieAddGraphics(mig, driver, dom) < 0) + return -1; + + if (!(*cookieout = qemuMigrationCookieXMLFormatStr(mig))) + return -1; + + *cookieoutlen = strlen(*cookieout) + 1; + + VIR_DEBUG("cookielen=%d cookie=%s", *cookieoutlen, *cookieout); + + return 0; +} + + +static qemuMigrationCookiePtr +qemuMigrationEatCookie(virDomainObjPtr dom, + const char *cookiein, + int cookieinlen, + int flags) +{ + qemuMigrationCookiePtr mig = NULL; + + /* Parse & validate incoming cookie (if any) */ + if (cookiein && cookieinlen && + cookiein[cookieinlen-1] != '\0') { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Migration cookie was not NULL terminated")); + goto error; + } + + VIR_DEBUG("cookielen=%d cookie='%s'", cookieinlen, NULLSTR(cookiein)); + + if (!(mig = qemuMigrationCookieNew(dom))) + return NULL; + + if (cookiein && cookieinlen && + qemuMigrationCookieXMLParseStr(mig, + cookiein, + flags) < 0) + goto error; + + return mig; + +error: + qemuMigrationCookieFree(mig); + return NULL; +} bool qemuMigrationIsAllowed(virDomainDefPtr def) @@ -233,6 +739,10 @@ cleanup: int qemuMigrationPrepareTunnel(struct qemud_driver *driver, virConnectPtr dconn, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, virStreamPtr st, const char *dname, const char *dom_xml) @@ -247,6 +757,7 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver, unsigned long long qemuCmdFlags; qemuDomainObjPrivatePtr priv = NULL; struct timeval now; + qemuMigrationCookiePtr mig = NULL; if (gettimeofday(&now, NULL) < 0) { virReportSystemError(errno, "%s", @@ -285,6 +796,10 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver, def = NULL; priv = vm->privateData; + if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_GRAPHICS))) + goto cleanup; + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) goto cleanup; priv->jobActive = QEMU_JOB_MIGRATION_OUT; @@ -358,6 +873,16 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver, event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED, VIR_DOMAIN_EVENT_STARTED_MIGRATED); + + if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, + QEMU_MIGRATION_COOKIE_GRAPHICS) < 0) { + /* We could tear down the whole guest here, but + * cookie data is (so far) non-critical, so that + * seems little harsh. We'll just warn for now.. + */ + VIR_WARN0("Unable to encode migration cookie"); + } + ret = 0; endjob: @@ -386,7 +911,7 @@ cleanup: virDomainObjUnlock(vm); if (event) qemuDomainEventQueue(driver, event); - qemuDriverUnlock(driver); + qemuMigrationCookieFree(mig); return ret; } @@ -394,6 +919,10 @@ cleanup: int qemuMigrationPrepareDirect(struct qemud_driver *driver, virConnectPtr dconn, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, const char *uri_in, char **uri_out, const char *dname, @@ -411,6 +940,7 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver, int internalret; qemuDomainObjPrivatePtr priv = NULL; struct timeval now; + qemuMigrationCookiePtr mig = NULL; if (gettimeofday(&now, NULL) < 0) { virReportSystemError(errno, "%s", @@ -523,6 +1053,10 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver, def = NULL; priv = vm->privateData; + if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_GRAPHICS))) + goto cleanup; + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) goto cleanup; priv->jobActive = QEMU_JOB_MIGRATION_OUT; @@ -548,6 +1082,15 @@ qemuMigrationPrepareDirect(struct qemud_driver *driver, goto endjob; } + if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, + QEMU_MIGRATION_COOKIE_GRAPHICS) < 0) { + /* We could tear down the whole guest here, but + * cookie data is (so far) non-critical, so that + * seems little harsh. We'll just warn for now.. + */ + VIR_WARN0("Unable to encode migration cookie"); + } + qemuDomainStartAudit(vm, "migrated", true); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED, @@ -580,6 +1123,7 @@ cleanup: virDomainObjUnlock(vm); if (event) qemuDomainEventQueue(driver, event); + qemuMigrationCookieFree(mig); return ret; } @@ -590,6 +1134,10 @@ cleanup: static int doNativeMigrate(struct qemud_driver *driver, virDomainObjPtr vm, const char *uri, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, unsigned int flags, const char *dname ATTRIBUTE_UNUSED, unsigned long resource) @@ -598,6 +1146,11 @@ static int doNativeMigrate(struct qemud_driver *driver, xmlURIPtr uribits = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; + qemuMigrationCookiePtr mig = NULL; + + if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, + QEMU_MIGRATION_COOKIE_GRAPHICS))) + goto cleanup; /* Issue the migrate command. */ if (STRPREFIX(uri, "tcp:") && !STRPREFIX(uri, "tcp://")) { @@ -641,9 +1194,13 @@ static int doNativeMigrate(struct qemud_driver *driver, if (qemuMigrationWaitForCompletion(driver, vm) < 0) goto cleanup; + if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, 0) < 0) + VIR_WARN0("Unable to encode migration cookie"); + ret = 0; cleanup: + qemuMigrationCookieFree(mig); xmlFreeURI(uribits); return ret; } @@ -929,14 +1486,16 @@ static int doNonTunnelMigrate(struct qemud_driver *driver, virDomainPtr ddomain = NULL; int retval = -1; char *uri_out = NULL; + char *cookie = NULL; + int cookielen = 0; int rc; qemuDomainObjEnterRemoteWithDriver(driver, vm); /* NB we don't pass 'uri' into this, since that's the libvirtd * URI in this context - so we let dest pick it */ rc = dconn->driver->domainMigratePrepare2(dconn, - NULL, /* cookie */ - 0, /* cookielen */ + &cookie, + &cookielen, NULL, /* uri */ &uri_out, flags, dname, @@ -961,7 +1520,10 @@ static int doNonTunnelMigrate(struct qemud_driver *driver, goto cleanup; } - if (doNativeMigrate(driver, vm, uri_out, flags, dname, resource) < 0) + if (doNativeMigrate(driver, vm, uri_out, + cookie, cookielen, + NULL, NULL, /* No out cookie with v2 migration */ + flags, dname, resource) < 0) goto finish; retval = 0; @@ -970,13 +1532,14 @@ finish: dname = dname ? dname : vm->def->name; qemuDomainObjEnterRemoteWithDriver(driver, vm); ddomain = dconn->driver->domainMigrateFinish2 - (dconn, dname, NULL, 0, uri_out, flags, retval); + (dconn, dname, cookie, cookielen, uri_out, flags, retval); qemuDomainObjExitRemoteWithDriver(driver, vm); if (ddomain) virUnrefDomain(ddomain); cleanup: + VIR_FREE(cookie); return retval; } @@ -1052,6 +1615,10 @@ int qemuMigrationPerform(struct qemud_driver *driver, virConnectPtr conn, virDomainObjPtr vm, const char *uri, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, unsigned long flags, const char *dname, unsigned long resource) @@ -1081,11 +1648,19 @@ int qemuMigrationPerform(struct qemud_driver *driver, } if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { + if (cookieinlen) { + qemuReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("received unexpected cookie with P2P migration")); + goto endjob; + } + if (doPeer2PeerMigrate(driver, vm, uri, flags, dname, resource) < 0) /* doPeer2PeerMigrate already set the error, so just get out */ goto endjob; } else { - if (doNativeMigrate(driver, vm, uri, flags, dname, resource) < 0) + if (doNativeMigrate(driver, vm, uri, cookiein, cookieinlen, + cookieout, cookieoutlen, + flags, dname, resource) < 0) goto endjob; } @@ -1103,6 +1678,7 @@ int qemuMigrationPerform(struct qemud_driver *driver, virDomainRemoveInactive(&driver->domains, vm); vm = NULL; } + ret = 0; endjob: @@ -1179,6 +1755,10 @@ virDomainPtr qemuMigrationFinish(struct qemud_driver *driver, virConnectPtr dconn, virDomainObjPtr vm, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, unsigned long flags, int retcode) { @@ -1186,6 +1766,7 @@ qemuMigrationFinish(struct qemud_driver *driver, virDomainEventPtr event = NULL; int newVM = 1; qemuDomainObjPrivatePtr priv = NULL; + qemuMigrationCookiePtr mig = NULL; priv = vm->privateData; if (priv->jobActive != QEMU_JOB_MIGRATION_IN) { @@ -1196,6 +1777,9 @@ qemuMigrationFinish(struct qemud_driver *driver, priv->jobActive = QEMU_JOB_NONE; memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); + if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + goto cleanup; + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) goto cleanup; @@ -1281,6 +1865,9 @@ qemuMigrationFinish(struct qemud_driver *driver, } } + if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, 0) < 0) + VIR_WARN0("Unable to encode migration cookie"); + endjob: if (vm && qemuDomainObjEndJob(vm) == 0) @@ -1291,5 +1878,6 @@ cleanup: virDomainObjUnlock(vm); if (event) qemuDomainEventQueue(driver, event); + qemuMigrationCookieFree(mig); return dom; } diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 3cac617..e4e68dc 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -34,12 +34,20 @@ int qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr int qemuMigrationPrepareTunnel(struct qemud_driver *driver, virConnectPtr dconn, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, virStreamPtr st, const char *dname, const char *dom_xml); int qemuMigrationPrepareDirect(struct qemud_driver *driver, virConnectPtr dconn, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, const char *uri_in, char **uri_out, const char *dname, @@ -49,6 +57,10 @@ int qemuMigrationPerform(struct qemud_driver *driver, virConnectPtr conn, virDomainObjPtr vm, const char *uri, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, unsigned long flags, const char *dname, unsigned long resource); @@ -56,6 +68,10 @@ int qemuMigrationPerform(struct qemud_driver *driver, virDomainPtr qemuMigrationFinish(struct qemud_driver *driver, virConnectPtr dconn, virDomainObjPtr vm, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, unsigned long flags, int retcode); diff --git a/src/remote/remote_protocol.h b/src/remote/remote_protocol.h index 016a2bb..37c6efe 100644 --- a/src/remote/remote_protocol.h +++ b/src/remote/remote_protocol.h @@ -43,7 +43,7 @@ typedef remote_nonnull_string *remote_string; #define REMOTE_CPUMAP_MAX 256 #define REMOTE_VCPUINFO_MAX 2048 #define REMOTE_CPUMAPS_MAX 16384 -#define REMOTE_MIGRATE_COOKIE_MAX 256 +#define REMOTE_MIGRATE_COOKIE_MAX 16384 #define REMOTE_NETWORK_NAME_LIST_MAX 256 #define REMOTE_INTERFACE_NAME_LIST_MAX 256 #define REMOTE_DEFINED_INTERFACE_NAME_LIST_MAX 256 diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x index 7eb27bd..20c671f 100644 --- a/src/remote/remote_protocol.x +++ b/src/remote/remote_protocol.x @@ -99,7 +99,7 @@ const REMOTE_VCPUINFO_MAX = 2048; const REMOTE_CPUMAPS_MAX = 16384; /* Upper limit on migrate cookie. */ -const REMOTE_MIGRATE_COOKIE_MAX = 256; +const REMOTE_MIGRATE_COOKIE_MAX = 16384; /* Upper limit on lists of network names. */ const REMOTE_NETWORK_NAME_LIST_MAX = 256; -- 1.7.4

On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
The migration protocol has support for a 'cookie' parameter which is an opaque array of bytes as far as libvirt is concerned. Drivers may use this for passing around arbitrary extra data they might need during migration. The QEMU driver needs todo a few things:
s/todo/to do/
- Pass hostname/uuid to allow strict protection against localhost migration attempts - Pass SPICE/VNC server port from the target back to the source to allow seemless relocation of client sessions
s/seemless/seamless/
+static void qemuMigrationCookieGraphicsFree(qemuMigrationCookieGraphicsPtr grap) +{ + if (!grap) + return; + VIR_FREE(grap->listen); + VIR_FREE(grap->tlsSubject); + VIR_FREE(grap); +} + + +static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig)
Should these two be added to cfg.mk's list of free-like functions?
+ if (virXPathLong("string(./graphics/@port)", ctxt, &port) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing port attribute in migration data")); + goto error; + } + grap->port = (int)port;
Why not just virXPathInt("...", ctxt, &grap->port, instead of going through the temporary variable?
+ if (grap->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) { + if (virXPathLong("string(./graphics/@tlsPort)", ctxt, &port) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("missing port attribute in migration data")); + goto error; + } + grap->tlsPort = (int)port;
Likewise.
@@ -358,6 +873,16 @@ qemuMigrationPrepareTunnel(struct qemud_driver *driver, event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED, VIR_DOMAIN_EVENT_STARTED_MIGRATED); + + if (qemuMigrationBakeCookie(mig, driver, vm, cookieout, cookieoutlen, + QEMU_MIGRATION_COOKIE_GRAPHICS) < 0) { + /* We could tear down the whole guest here, but + * cookie data is (so far) non-critical, so that + * seems little harsh. We'll just warn for now..
s/seems /seems a / (twice) ACK with those nits fixed. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

Implement the v3 migration protocol, which has two extra steps, 'begin' on the source host and 'confirm' on the source host. All other methods also gain both input and output cookies to allow bi-directional data passing at all stages * src/qemu/qemu_driver.c: Wire up migrate v3 APIs * src/qemu/qemu_migration.c, src/qemu/qemu_migration.h: Add begin & confirm methods --- src/qemu/qemu_driver.c | 318 +++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_migration.c | 141 ++++++++++++++++++-- src/qemu/qemu_migration.h | 16 ++- 3 files changed, 454 insertions(+), 21 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 4f72c07..91caeea 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -866,6 +866,7 @@ qemudSupportsFeature (virConnectPtr conn ATTRIBUTE_UNUSED, int feature) { switch (feature) { case VIR_DRV_FEATURE_MIGRATION_V2: + case VIR_DRV_FEATURE_MIGRATION_V3: case VIR_DRV_FEATURE_MIGRATION_P2P: return 1; default: @@ -5343,7 +5344,9 @@ qemuDomainEventDeregisterAny(virConnectPtr conn, } -/* Migration support. */ +/******************************************************************* + * Migration Protocol Version 2 + *******************************************************************/ /* Prepare is the first step, and it runs on the destination host. * @@ -5361,6 +5364,15 @@ qemudDomainMigratePrepareTunnel(virConnectPtr dconn, struct qemud_driver *driver = dconn->privateData; int ret = -1; + virCheckFlags(VIR_MIGRATE_LIVE | + VIR_MIGRATE_PEER2PEER | + VIR_MIGRATE_TUNNELLED | + VIR_MIGRATE_PERSIST_DEST | + VIR_MIGRATE_UNDEFINE_SOURCE | + VIR_MIGRATE_PAUSED | + VIR_MIGRATE_NON_SHARED_DISK | + VIR_MIGRATE_NON_SHARED_INC, -1); + if (!dom_xml) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no domain XML passed")); @@ -5480,7 +5492,7 @@ qemudDomainMigratePerform (virDomainPtr dom, ret = qemuMigrationPerform(driver, dom->conn, vm, uri, cookie, cookielen, NULL, NULL, /* No output cookies in v2 */ - flags, dname, resource); + flags, dname, resource, true); cleanup: qemuDriverUnlock(driver); @@ -5537,6 +5549,296 @@ cleanup: } +/******************************************************************* + * Migration Protocol Version 3 + *******************************************************************/ + +static char * +qemuDomainMigrateBegin3(virDomainPtr domain, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname ATTRIBUTE_UNUSED, + unsigned long resource ATTRIBUTE_UNUSED) +{ + struct qemud_driver *driver = domain->conn->privateData; + virDomainObjPtr vm; + char *xml = NULL; + + virCheckFlags(VIR_MIGRATE_LIVE | + VIR_MIGRATE_PEER2PEER | + VIR_MIGRATE_TUNNELLED | + VIR_MIGRATE_PERSIST_DEST | + VIR_MIGRATE_UNDEFINE_SOURCE | + VIR_MIGRATE_PAUSED | + VIR_MIGRATE_NON_SHARED_DISK | + VIR_MIGRATE_NON_SHARED_INC, NULL); + + qemuDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, domain->uuid); + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(domain->uuid, uuidstr); + qemuReportError(VIR_ERR_NO_DOMAIN, + _("no domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } + + xml = qemuMigrationBegin(driver, vm, + cookieout, cookieoutlen); + +cleanup: + qemuDriverUnlock(driver); + return xml; +} + +static int +qemuDomainMigratePrepare3(virConnectPtr dconn, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri_in, + char **uri_out, + unsigned long flags, + const char *dname, + unsigned long resource ATTRIBUTE_UNUSED, + const char *dom_xml) +{ + struct qemud_driver *driver = dconn->privateData; + int ret = -1; + + virCheckFlags(VIR_MIGRATE_LIVE | + VIR_MIGRATE_PEER2PEER | + VIR_MIGRATE_TUNNELLED | + VIR_MIGRATE_PERSIST_DEST | + VIR_MIGRATE_UNDEFINE_SOURCE | + VIR_MIGRATE_PAUSED | + VIR_MIGRATE_NON_SHARED_DISK | + VIR_MIGRATE_NON_SHARED_INC, -1); + + *uri_out = NULL; + + qemuDriverLock(driver); + if (flags & VIR_MIGRATE_TUNNELLED) { + /* this is a logical error; we never should have gotten here with + * VIR_MIGRATE_TUNNELLED set + */ + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Tunnelled migration requested but invalid RPC method called")); + goto cleanup; + } + + if (!dom_xml) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("no domain XML passed")); + goto cleanup; + } + + ret = qemuMigrationPrepareDirect(driver, dconn, + cookiein, cookieinlen, + cookieout, cookieoutlen, + uri_in, uri_out, + dname, dom_xml); + +cleanup: + qemuDriverUnlock(driver); + return ret; +} + + +static int +qemuDomainMigratePrepareTunnel3(virConnectPtr dconn, + virStreamPtr st, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource ATTRIBUTE_UNUSED, + const char *dom_xml) +{ + struct qemud_driver *driver = dconn->privateData; + int ret = -1; + + virCheckFlags(VIR_MIGRATE_LIVE | + VIR_MIGRATE_PEER2PEER | + VIR_MIGRATE_TUNNELLED | + VIR_MIGRATE_PERSIST_DEST | + VIR_MIGRATE_UNDEFINE_SOURCE | + VIR_MIGRATE_PAUSED | + VIR_MIGRATE_NON_SHARED_DISK | + VIR_MIGRATE_NON_SHARED_INC, -1); + + if (!dom_xml) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("no domain XML passed")); + goto cleanup; + } + if (!(flags & VIR_MIGRATE_TUNNELLED)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("PrepareTunnel called but no TUNNELLED flag set")); + goto cleanup; + } + if (st == NULL) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("tunnelled migration requested but NULL stream passed")); + goto cleanup; + } + + qemuDriverLock(driver); + ret = qemuMigrationPrepareTunnel(driver, dconn, + cookiein, cookieinlen, + cookieout, cookieoutlen, + st, dname, dom_xml); + qemuDriverUnlock(driver); + +cleanup: + return ret; +} + + +static int +qemuDomainMigratePerform3(virDomainPtr dom, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri, + unsigned long flags, + const char *dname, + unsigned long resource) +{ + struct qemud_driver *driver = dom->conn->privateData; + virDomainObjPtr vm; + int ret = -1; + + virCheckFlags(VIR_MIGRATE_LIVE | + VIR_MIGRATE_PEER2PEER | + VIR_MIGRATE_TUNNELLED | + VIR_MIGRATE_PERSIST_DEST | + VIR_MIGRATE_UNDEFINE_SOURCE | + VIR_MIGRATE_PAUSED | + VIR_MIGRATE_NON_SHARED_DISK | + VIR_MIGRATE_NON_SHARED_INC, -1); + + qemuDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, dom->uuid); + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(dom->uuid, uuidstr); + qemuReportError(VIR_ERR_NO_DOMAIN, + _("no domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } + + ret = qemuMigrationPerform(driver, dom->conn, vm, + uri, cookiein, cookieinlen, + cookieout, cookieoutlen, + flags, dname, resource, false); + +cleanup: + qemuDriverUnlock(driver); + return ret; +} + + +static int +qemuDomainMigrateFinish3(virConnectPtr dconn, + const char *dname, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + const char *uri ATTRIBUTE_UNUSED, + unsigned long flags, + int cancelled, + virDomainPtr *newdom) +{ + struct qemud_driver *driver = dconn->privateData; + virDomainObjPtr vm; + virErrorPtr orig_err; + int ret = -1; + + virCheckFlags(VIR_MIGRATE_LIVE | + VIR_MIGRATE_PEER2PEER | + VIR_MIGRATE_TUNNELLED | + VIR_MIGRATE_PERSIST_DEST | + VIR_MIGRATE_UNDEFINE_SOURCE | + VIR_MIGRATE_PAUSED | + VIR_MIGRATE_NON_SHARED_DISK | + VIR_MIGRATE_NON_SHARED_INC, -1); + + /* Migration failed. Save the current error so nothing squashes it */ + orig_err = virSaveLastError(); + + qemuDriverLock(driver); + vm = virDomainFindByName(&driver->domains, dname); + if (!vm) { + qemuReportError(VIR_ERR_NO_DOMAIN, + _("no domain with matching name '%s'"), dname); + goto cleanup; + } + + *newdom = qemuMigrationFinish(driver, dconn, vm, + cookiein, cookieinlen, + cookieout, cookieoutlen, + flags, cancelled); + + ret = 0; + +cleanup: + if (orig_err) { + virSetError(orig_err); + virFreeError(orig_err); + } + qemuDriverUnlock(driver); + return ret; +} + +static int +qemuDomainMigrateConfirm3(virDomainPtr domain, + const char *cookiein, + int cookieinlen, + unsigned long flags, + int cancelled) +{ + struct qemud_driver *driver = domain->conn->privateData; + virDomainObjPtr vm; + int ret = -1; + + virCheckFlags(VIR_MIGRATE_LIVE | + VIR_MIGRATE_PEER2PEER | + VIR_MIGRATE_TUNNELLED | + VIR_MIGRATE_PERSIST_DEST | + VIR_MIGRATE_UNDEFINE_SOURCE | + VIR_MIGRATE_PAUSED | + VIR_MIGRATE_NON_SHARED_DISK | + VIR_MIGRATE_NON_SHARED_INC, -1); + + /* Migration failed. Save the current error so nothing squashes it */ + + qemuDriverLock(driver); + vm = virDomainFindByUUID(&driver->domains, domain->uuid); + if (!vm) { + char uuidstr[VIR_UUID_STRING_BUFLEN]; + virUUIDFormat(domain->uuid, uuidstr); + qemuReportError(VIR_ERR_NO_DOMAIN, + _("no domain with matching uuid '%s'"), uuidstr); + goto cleanup; + } + + ret = qemuMigrationConfirm(driver, domain->conn, vm, + cookiein, cookieinlen, + flags, cancelled); + +cleanup: + qemuDriverUnlock(driver); + return ret; +} + + static int qemudNodeDeviceGetPciInfo (virNodeDevicePtr dev, unsigned *domain, @@ -6860,12 +7162,12 @@ static virDriver qemuDriver = { qemuDomainSetMemoryParameters, /* domainSetMemoryParameters */ qemuDomainGetMemoryParameters, /* domainGetMemoryParameters */ qemuDomainOpenConsole, /* domainOpenConsole */ - NULL, /* domainMigrateBegin3 */ - NULL, /* domainMigratePrepare3 */ - NULL, /* domainMigratePrepareTunnel3 */ - NULL, /* domainMigratePerform3 */ - NULL, /* domainMigrateFinish3 */ - NULL, /* domainMigrateConfirm3 */ + qemuDomainMigrateBegin3, /* domainMigrateBegin3 */ + qemuDomainMigratePrepare3, /* domainMigratePrepare3 */ + qemuDomainMigratePrepareTunnel3, /* domainMigratePrepareTunnel3 */ + qemuDomainMigratePerform3, /* domainMigratePerform3 */ + qemuDomainMigrateFinish3, /* domainMigrateFinish3 */ + qemuDomainMigrateConfirm3, /* domainMigrateConfirm3 */ }; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 097acaf..4901918 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -731,6 +731,42 @@ cleanup: } +char *qemuMigrationBegin(struct qemud_driver *driver, + virDomainObjPtr vm, + char **cookieout, + int *cookieoutlen) +{ + char *rv = NULL; + qemuMigrationCookiePtr mig = NULL; + + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto cleanup; + } + + if (!qemuMigrationIsAllowed(vm->def)) + goto cleanup; + + if (!(mig = qemuMigrationEatCookie(vm, NULL, 0, 0))) + goto cleanup; + + if (qemuMigrationBakeCookie(mig, driver, vm, + cookieout, cookieoutlen, + 0) < 0) + goto cleanup; + + rv = qemuDomainFormatXML(driver, vm, + VIR_DOMAIN_XML_SECURE | + VIR_DOMAIN_XML_UPDATE_CPU); + +cleanup: + virDomainObjUnlock(vm); + qemuMigrationCookieFree(mig); + return rv; +} + + /* Prepare is the first step, and it runs on the destination host. * * This version starts an empty VM listening on a localhost TCP port, and @@ -1621,7 +1657,8 @@ int qemuMigrationPerform(struct qemud_driver *driver, int *cookieoutlen, unsigned long flags, const char *dname, - unsigned long resource) + unsigned long resource, + bool killOnFinish) { virDomainEventPtr event = NULL; int ret = -1; @@ -1665,18 +1702,20 @@ int qemuMigrationPerform(struct qemud_driver *driver, } /* Clean up the source domain. */ - qemuProcessStop(driver, vm, 1); - qemuDomainStopAudit(vm, "migrated"); - resume = 0; + if (killOnFinish) { + qemuProcessStop(driver, vm, 1); + qemuDomainStopAudit(vm, "migrated"); + resume = 0; - event = virDomainEventNewFromObj(vm, - VIR_DOMAIN_EVENT_STOPPED, - VIR_DOMAIN_EVENT_STOPPED_MIGRATED); - if (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE)) { - virDomainDeleteConfig(driver->configDir, driver->autostartDir, vm); - if (qemuDomainObjEndJob(vm) > 0) - virDomainRemoveInactive(&driver->domains, vm); - vm = NULL; + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_MIGRATED); + if (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE)) { + virDomainDeleteConfig(driver->configDir, driver->autostartDir, vm); + if (qemuDomainObjEndJob(vm) > 0) + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } } ret = 0; @@ -1881,3 +1920,81 @@ cleanup: qemuMigrationCookieFree(mig); return dom; } + +int qemuMigrationConfirm(struct qemud_driver *driver, + virConnectPtr conn, + virDomainObjPtr vm, + const char *cookiein, + int cookieinlen, + unsigned int flags, + int retcode) +{ + qemuMigrationCookiePtr mig; + virDomainEventPtr event = NULL; + int rv = -1; + + if (!(mig = qemuMigrationEatCookie(vm, cookiein, cookieinlen, 0))) + return -1; + + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) + goto cleanup; + + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("guest unexpectedly quit")); + goto endjob; + } + + /* Did the migration go as planned? If yes, kill off the + * domain object, but if no, resume CPUs + */ + if (retcode == 0) { + qemuProcessStop(driver, vm, 1); + qemuDomainStopAudit(vm, "migrated"); + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_STOPPED, + VIR_DOMAIN_EVENT_STOPPED_MIGRATED); + if (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE)) { + virDomainDeleteConfig(driver->configDir, driver->autostartDir, vm); + if (qemuDomainObjEndJob(vm) > 0) + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + } else { + + /* run 'cont' on the destination, which allows migration on qemu + * >= 0.10.6 to work properly. This isn't strictly necessary on + * older qemu's, but it also doesn't hurt anything there + */ + if (qemuProcessStartCPUs(driver, vm, conn) < 0) { + if (virGetLastError() == NULL) + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("resume operation failed")); + goto endjob; + } + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_RESUMED, + VIR_DOMAIN_EVENT_RESUMED_MIGRATED); + if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { + VIR_WARN("Failed to save status on vm %s", vm->def->name); + goto endjob; + } + } + + qemuMigrationCookieFree(mig); + rv = 0; + +endjob: + if (vm && + qemuDomainObjEndJob(vm) == 0) + vm = NULL; + +cleanup: + if (vm) + virDomainObjUnlock(vm); + if (event) + qemuDomainEventQueue(driver, event); + return rv; +} diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index e4e68dc..3c7bf62 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -32,6 +32,11 @@ int qemuMigrationSetOffline(struct qemud_driver *driver, int qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm); +char *qemuMigrationBegin(struct qemud_driver *driver, + virDomainObjPtr vm, + char **cookieout, + int *cookieoutlen); + int qemuMigrationPrepareTunnel(struct qemud_driver *driver, virConnectPtr dconn, const char *cookiein, @@ -63,7 +68,8 @@ int qemuMigrationPerform(struct qemud_driver *driver, int *cookieoutlen, unsigned long flags, const char *dname, - unsigned long resource); + unsigned long resource, + bool killOnFinish); virDomainPtr qemuMigrationFinish(struct qemud_driver *driver, virConnectPtr dconn, @@ -75,5 +81,13 @@ virDomainPtr qemuMigrationFinish(struct qemud_driver *driver, unsigned long flags, int retcode); +int qemuMigrationConfirm(struct qemud_driver *driver, + virConnectPtr conn, + virDomainObjPtr vm, + const char *cookiein, + int cookieinlen, + unsigned int flags, + int retcode); + #endif /* __QEMU_MIGRATION_H__ */ -- 1.7.4

On 02/09/2011 09:58 AM, Daniel P. Berrange wrote:
Implement the v3 migration protocol, which has two extra steps, 'begin' on the source host and 'confirm' on the source host. All other methods also gain both input and output cookies to allow bi-directional data passing at all stages
* src/qemu/qemu_driver.c: Wire up migrate v3 APIs * src/qemu/qemu_migration.c, src/qemu/qemu_migration.h: Add begin & confirm methods --- src/qemu/qemu_driver.c | 318 +++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_migration.c | 141 ++++++++++++++++++-- src/qemu/qemu_migration.h | 16 ++-
ACK. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org
participants (4)
-
Christian Benvenuti (benve)
-
Daniel P. Berrange
-
Eric Blake
-
Stefan Berger