[libvirt] [PATCH 0/4] Pre-create storage on live migration

Currently users are required to pre-create storage on their own upon migration. This patch set implements the feature for RAW, QCOW and QCOW2 images. While for the RAW fallocate() is used (so we are guaranteed subsequent write() won't fail with ENOSPC, for the latter two we have no other option than using qemu-img, which uses ftruncate(). On the other hand, this is not such big deal. If the spare runs out as a domain is migrating, the migration is aborted, and pre-created storage unlink()-ed. There's no difference to what will happen if user will pre-create storage by hand. https://bugzilla.redhat.com/show_bug.cgi?id=927252 Michal Privoznik (4): qemu: Expose file opening functions qemu_domain: Introduce qemuDomainGetDiskBlockInfo qemu_migration: Check size prerequisites qemu_migration: Unlink pre-created storage on error src/qemu/qemu_domain.c | 299 ++++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_domain.h | 16 +++ src/qemu/qemu_driver.c | 276 ++--------------------------------------- src/qemu/qemu_migration.c | 304 +++++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_process.c | 8 ++ 5 files changed, 627 insertions(+), 276 deletions(-) -- 1.8.1.5

The qemuOpenFileAs and qemuOpenFile functions are gonna be needed in later patches. However, they are static now. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 10 ++++ src/qemu/qemu_driver.c | 155 ------------------------------------------------- 3 files changed, 156 insertions(+), 155 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index f2cca70..b5770c5 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -39,6 +39,7 @@ #include "virstoragefile.h" #include "virstring.h" +#include <sys/stat.h> #include <sys/time.h> #include <fcntl.h> @@ -2355,3 +2356,148 @@ qemuDomainUpdateDeviceList(virQEMUDriverPtr driver, priv->qemuDevices = aliases; return 0; } + +/* Internal function to properly create or open existing files, with + * ownership affected by qemu driver setup and domain DAC label. */ +int +qemuOpenFile(virQEMUDriverPtr driver, + virDomainObjPtr vm, + const char *path, int oflags, + bool *needUnlink, bool *bypassSecurityDriver) +{ + int ret = -1; + virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + uid_t user = cfg->user; + gid_t group = cfg->group; + bool dynamicOwnership = cfg->dynamicOwnership; + virSecurityLabelDefPtr seclabel; + + virObjectUnref(cfg); + + /* TODO: Take imagelabel into account? */ + if (vm && + (seclabel = virDomainDefGetSecurityLabelDef(vm->def, "dac")) != NULL && + (virParseOwnershipIds(seclabel->label, &user, &group) < 0)) + goto cleanup; + + ret = qemuOpenFileAs(user, group, dynamicOwnership, + path, oflags, needUnlink, bypassSecurityDriver); + + cleanup: + return ret; +} + +int +qemuOpenFileAs(uid_t fallback_uid, gid_t fallback_gid, + bool dynamicOwnership, + const char *path, int oflags, + bool *needUnlink, bool *bypassSecurityDriver) +{ + struct stat sb; + bool is_reg = true; + bool need_unlink = false; + bool bypass_security = false; + unsigned int vfoflags = 0; + int fd = -1; + int path_shared = virStorageFileIsSharedFS(path); + uid_t uid = getuid(); + gid_t gid = getgid(); + + /* path might be a pre-existing block dev, in which case + * we need to skip the create step, and also avoid unlink + * in the failure case */ + if (oflags & O_CREAT) { + need_unlink = true; + + /* Don't force chown on network-shared FS + * as it is likely to fail. */ + if (path_shared <= 0 || dynamicOwnership) + vfoflags |= VIR_FILE_OPEN_FORCE_OWNER; + + if (stat(path, &sb) == 0) { + is_reg = !!S_ISREG(sb.st_mode); + /* If the path is regular file which exists + * already and dynamic_ownership is off, we don't + * want to change it's ownership, just open it as-is */ + if (is_reg && !dynamicOwnership) { + uid = sb.st_uid; + gid = sb.st_gid; + } + } + } + + /* First try creating the file as root */ + if (!is_reg) { + if ((fd = open(path, oflags & ~O_CREAT)) < 0) { + fd = -errno; + goto error; + } + } else { + if ((fd = virFileOpenAs(path, oflags, S_IRUSR | S_IWUSR, uid, gid, + vfoflags | VIR_FILE_OPEN_NOFORK)) < 0) { + /* If we failed as root, and the error was permission-denied + (EACCES or EPERM), assume it's on a network-connected share + where root access is restricted (eg, root-squashed NFS). If the + qemu user is non-root, just set a flag to + bypass security driver shenanigans, and retry the operation + after doing setuid to qemu user */ + if ((fd != -EACCES && fd != -EPERM) || fallback_uid == getuid()) + goto error; + + /* On Linux we can also verify the FS-type of the directory. */ + switch (path_shared) { + case 1: + /* it was on a network share, so we'll continue + * as outlined above + */ + break; + + case -1: + virReportSystemError(-fd, oflags & O_CREAT + ? _("Failed to create file " + "'%s': couldn't determine fs type") + : _("Failed to open file " + "'%s': couldn't determine fs type"), + path); + goto cleanup; + + case 0: + default: + /* local file - log the error returned by virFileOpenAs */ + goto error; + } + + /* Retry creating the file as qemu user */ + + if ((fd = virFileOpenAs(path, oflags, + S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, + fallback_uid, fallback_gid, + vfoflags | VIR_FILE_OPEN_FORK)) < 0) { + virReportSystemError(-fd, oflags & O_CREAT + ? _("Error from child process creating '%s'") + : _("Error from child process opening '%s'"), + path); + goto cleanup; + } + + /* Since we had to setuid to create the file, and the fstype + is NFS, we assume it's a root-squashing NFS share, and that + the security driver stuff would have failed anyway */ + + bypass_security = true; + } + } +cleanup: + if (needUnlink) + *needUnlink = need_unlink; + if (bypassSecurityDriver) + *bypassSecurityDriver = bypass_security; + return fd; + +error: + virReportSystemError(-fd, oflags & O_CREAT + ? _("Failed to create file '%s'") + : _("Failed to open file '%s'"), + path); + goto cleanup; +} diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 21f116c..50987a4 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -368,4 +368,14 @@ extern virDomainDefParserConfig virQEMUDriverDomainDefParserConfig; int qemuDomainUpdateDeviceList(virQEMUDriverPtr driver, virDomainObjPtr vm); +int qemuOpenFile(virQEMUDriverPtr driver, + virDomainObjPtr vm, + const char *path, int oflags, + bool *needUnlink, bool *bypassSecurityDriver); + +int qemuOpenFileAs(uid_t fallback_uid, gid_t fallback_gid, + bool dynamicOwnership, + const char *path, int oflags, + bool *needUnlink, bool *bypassSecurityDriver); + #endif /* __QEMU_DOMAIN_H__ */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index bbf2d23..5f5d0f1 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -157,16 +157,6 @@ static int qemuDomainGetMaxVcpus(virDomainPtr dom); static int qemuDomainManagedSaveLoad(virDomainObjPtr vm, void *opaque); -static int qemuOpenFile(virQEMUDriverPtr driver, - virDomainObjPtr vm, - const char *path, int oflags, - bool *needUnlink, bool *bypassSecurityDriver); - -static int qemuOpenFileAs(uid_t fallback_uid, gid_t fallback_gid, - bool dynamicOwnership, - const char *path, int oflags, - bool *needUnlink, bool *bypassSecurityDriver); - virQEMUDriverPtr qemu_driver = NULL; @@ -2743,151 +2733,6 @@ qemuCompressGetCommand(virQEMUSaveFormat compression) return ret; } -/* Internal function to properly create or open existing files, with - * ownership affected by qemu driver setup and domain DAC label. */ -static int -qemuOpenFile(virQEMUDriverPtr driver, - virDomainObjPtr vm, - const char *path, int oflags, - bool *needUnlink, bool *bypassSecurityDriver) -{ - int ret = -1; - virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); - uid_t user = cfg->user; - gid_t group = cfg->group; - bool dynamicOwnership = cfg->dynamicOwnership; - virSecurityLabelDefPtr seclabel; - - virObjectUnref(cfg); - - /* TODO: Take imagelabel into account? */ - if (vm && - (seclabel = virDomainDefGetSecurityLabelDef(vm->def, "dac")) != NULL && - (virParseOwnershipIds(seclabel->label, &user, &group) < 0)) - goto cleanup; - - ret = qemuOpenFileAs(user, group, dynamicOwnership, - path, oflags, needUnlink, bypassSecurityDriver); - - cleanup: - return ret; -} - -static int -qemuOpenFileAs(uid_t fallback_uid, gid_t fallback_gid, - bool dynamicOwnership, - const char *path, int oflags, - bool *needUnlink, bool *bypassSecurityDriver) -{ - struct stat sb; - bool is_reg = true; - bool need_unlink = false; - bool bypass_security = false; - unsigned int vfoflags = 0; - int fd = -1; - int path_shared = virStorageFileIsSharedFS(path); - uid_t uid = getuid(); - gid_t gid = getgid(); - - /* path might be a pre-existing block dev, in which case - * we need to skip the create step, and also avoid unlink - * in the failure case */ - if (oflags & O_CREAT) { - need_unlink = true; - - /* Don't force chown on network-shared FS - * as it is likely to fail. */ - if (path_shared <= 0 || dynamicOwnership) - vfoflags |= VIR_FILE_OPEN_FORCE_OWNER; - - if (stat(path, &sb) == 0) { - is_reg = !!S_ISREG(sb.st_mode); - /* If the path is regular file which exists - * already and dynamic_ownership is off, we don't - * want to change it's ownership, just open it as-is */ - if (is_reg && !dynamicOwnership) { - uid = sb.st_uid; - gid = sb.st_gid; - } - } - } - - /* First try creating the file as root */ - if (!is_reg) { - if ((fd = open(path, oflags & ~O_CREAT)) < 0) { - fd = -errno; - goto error; - } - } else { - if ((fd = virFileOpenAs(path, oflags, S_IRUSR | S_IWUSR, uid, gid, - vfoflags | VIR_FILE_OPEN_NOFORK)) < 0) { - /* If we failed as root, and the error was permission-denied - (EACCES or EPERM), assume it's on a network-connected share - where root access is restricted (eg, root-squashed NFS). If the - qemu user is non-root, just set a flag to - bypass security driver shenanigans, and retry the operation - after doing setuid to qemu user */ - if ((fd != -EACCES && fd != -EPERM) || fallback_uid == getuid()) - goto error; - - /* On Linux we can also verify the FS-type of the directory. */ - switch (path_shared) { - case 1: - /* it was on a network share, so we'll continue - * as outlined above - */ - break; - - case -1: - virReportSystemError(-fd, oflags & O_CREAT - ? _("Failed to create file " - "'%s': couldn't determine fs type") - : _("Failed to open file " - "'%s': couldn't determine fs type"), - path); - goto cleanup; - - case 0: - default: - /* local file - log the error returned by virFileOpenAs */ - goto error; - } - - /* Retry creating the file as qemu user */ - - if ((fd = virFileOpenAs(path, oflags, - S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, - fallback_uid, fallback_gid, - vfoflags | VIR_FILE_OPEN_FORK)) < 0) { - virReportSystemError(-fd, oflags & O_CREAT - ? _("Error from child process creating '%s'") - : _("Error from child process opening '%s'"), - path); - goto cleanup; - } - - /* Since we had to setuid to create the file, and the fstype - is NFS, we assume it's a root-squashing NFS share, and that - the security driver stuff would have failed anyway */ - - bypass_security = true; - } - } -cleanup: - if (needUnlink) - *needUnlink = need_unlink; - if (bypassSecurityDriver) - *bypassSecurityDriver = bypass_security; - return fd; - -error: - virReportSystemError(-fd, oflags & O_CREAT - ? _("Failed to create file '%s'") - : _("Failed to open file '%s'"), - path); - goto cleanup; -} - /* Helper function to execute a migration to file with a correct save header * the caller needs to make sure that the processors are stopped and do all other * actions besides saving memory */ -- 1.8.1.5

This is just digging out important implementation from qemu driver's qemuDomainGetDiskBlockInfo() API as this functionality is going to be required in the next patch. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 4 ++ src/qemu/qemu_driver.c | 121 +++-------------------------------------------- 3 files changed, 135 insertions(+), 114 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index b5770c5..64cd278 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -2501,3 +2501,127 @@ error: path); goto cleanup; } + +int +qemuDomainGetDiskBlockInfo(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainDiskDefPtr disk, + virDomainBlockInfoPtr info) +{ + + int ret = -1; + virStorageFileMetadata *meta = NULL; + virQEMUDriverConfigPtr cfg = NULL; + int format; + struct stat sb; + int fd = -1; + off_t end; + const char *path; + + if (!disk->src) { + virReportError(VIR_ERR_INVALID_ARG, + _("disk %s does not currently have a source assigned"), + disk->info.alias); + goto cleanup; + } + path = disk->src; + + /* The path is correct, now try to open it and get its size. */ + fd = qemuOpenFile(driver, vm, path, O_RDONLY, NULL, NULL); + if (fd == -1) + goto cleanup; + + /* Probe for magic formats */ + if (disk->format) { + format = disk->format; + } else { + if (cfg->allowDiskFormatProbing) { + if ((format = virStorageFileProbeFormat(disk->src, + cfg->user, + cfg->group)) < 0) + goto cleanup; + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("no disk format for %s and probing is disabled"), + disk->src); + goto cleanup; + } + } + + if (!(meta = virStorageFileGetMetadataFromFD(path, fd, format))) + goto cleanup; + + /* Get info for normal formats */ + if (fstat(fd, &sb) < 0) { + virReportSystemError(errno, + _("cannot stat file '%s'"), path); + goto cleanup; + } + + if (S_ISREG(sb.st_mode)) { +#ifndef WIN32 + info->physical = (unsigned long long)sb.st_blocks * + (unsigned long long)DEV_BSIZE; +#else + info->physical = sb.st_size; +#endif + /* Regular files may be sparse, so logical size (capacity) is not same + * as actual physical above + */ + info->capacity = sb.st_size; + } else { + /* NB. Because we configure with AC_SYS_LARGEFILE, off_t should + * be 64 bits on all platforms. + */ + end = lseek(fd, 0, SEEK_END); + if (end == (off_t)-1) { + virReportSystemError(errno, + _("failed to seek to end of %s"), path); + goto cleanup; + } + info->physical = end; + info->capacity = end; + } + + /* If the file we probed has a capacity set, then override + * what we calculated from file/block extents */ + if (meta->capacity) + info->capacity = meta->capacity; + + /* Set default value .. */ + info->allocation = info->physical; + + /* ..but if guest is running & not using raw + disk format and on a block device, then query + highest allocated extent from QEMU */ + if (disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && + format != VIR_STORAGE_FILE_RAW && + S_ISBLK(sb.st_mode) && + virDomainObjIsActive(vm)) { + qemuDomainObjPrivatePtr priv = vm->privateData; + + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0) + goto cleanup; + + if (virDomainObjIsActive(vm)) { + qemuDomainObjEnterMonitor(driver, vm); + ret = qemuMonitorGetBlockExtent(priv->mon, + disk->info.alias, + &info->allocation); + qemuDomainObjExitMonitor(driver, vm); + } else { + ret = 0; + } + + if (!qemuDomainObjEndJob(driver, vm)) + vm = NULL; + } else { + ret = 0; + } + +cleanup: + VIR_FORCE_CLOSE(fd); + virStorageFileFreeMetadata(meta); + virObjectUnref(cfg); + return ret; +} diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 50987a4..b6e6e33 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -360,6 +360,10 @@ void qemuDomainCleanupRemove(virDomainObjPtr vm, qemuDomainCleanupCallback cb); void qemuDomainCleanupRun(virQEMUDriverPtr driver, virDomainObjPtr vm); +int qemuDomainGetDiskBlockInfo(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainDiskDefPtr disk, + virDomainBlockInfoPtr info); extern virDomainXMLPrivateDataCallbacks virQEMUDriverPrivateDataCallbacks; extern virDomainXMLNamespace virQEMUDriverDomainXMLNamespace; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 5f5d0f1..78058b9 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -9556,29 +9556,23 @@ cleanup: } -static int qemuDomainGetBlockInfo(virDomainPtr dom, - const char *path, - virDomainBlockInfoPtr info, - unsigned int flags) { +static int +qemuDomainGetBlockInfo(virDomainPtr dom, + const char *path, + virDomainBlockInfoPtr info, + unsigned int flags) +{ virQEMUDriverPtr driver = dom->conn->privateData; virDomainObjPtr vm; int ret = -1; - int fd = -1; - off_t end; - virStorageFileMetadata *meta = NULL; virDomainDiskDefPtr disk = NULL; - struct stat sb; int idx; - int format; - virQEMUDriverConfigPtr cfg = NULL; virCheckFlags(0, -1); if (!(vm = qemuDomObjFromDomain(dom))) goto cleanup; - cfg = virQEMUDriverGetConfig(driver); - if (virDomainGetBlockInfoEnsureACL(dom->conn, vm->def) < 0) goto cleanup; @@ -9595,113 +9589,12 @@ static int qemuDomainGetBlockInfo(virDomainPtr dom, goto cleanup; } disk = vm->def->disks[idx]; - if (!disk->src) { - virReportError(VIR_ERR_INVALID_ARG, - _("disk %s does not currently have a source assigned"), - path); - goto cleanup; - } - path = disk->src; - - /* The path is correct, now try to open it and get its size. */ - fd = qemuOpenFile(driver, vm, path, O_RDONLY, NULL, NULL); - if (fd == -1) - goto cleanup; - - /* Probe for magic formats */ - if (disk->format) { - format = disk->format; - } else { - if (cfg->allowDiskFormatProbing) { - if ((format = virStorageFileProbeFormat(disk->src, - cfg->user, - cfg->group)) < 0) - goto cleanup; - } else { - virReportError(VIR_ERR_INTERNAL_ERROR, - _("no disk format for %s and probing is disabled"), - disk->src); - goto cleanup; - } - } - - if (!(meta = virStorageFileGetMetadataFromFD(path, fd, format))) - goto cleanup; - - /* Get info for normal formats */ - if (fstat(fd, &sb) < 0) { - virReportSystemError(errno, - _("cannot stat file '%s'"), path); - goto cleanup; - } - if (S_ISREG(sb.st_mode)) { -#ifndef WIN32 - info->physical = (unsigned long long)sb.st_blocks * - (unsigned long long)DEV_BSIZE; -#else - info->physical = sb.st_size; -#endif - /* Regular files may be sparse, so logical size (capacity) is not same - * as actual physical above - */ - info->capacity = sb.st_size; - } else { - /* NB. Because we configure with AC_SYS_LARGEFILE, off_t should - * be 64 bits on all platforms. - */ - end = lseek(fd, 0, SEEK_END); - if (end == (off_t)-1) { - virReportSystemError(errno, - _("failed to seek to end of %s"), path); - goto cleanup; - } - info->physical = end; - info->capacity = end; - } - - /* If the file we probed has a capacity set, then override - * what we calculated from file/block extents */ - if (meta->capacity) - info->capacity = meta->capacity; - - /* Set default value .. */ - info->allocation = info->physical; - - /* ..but if guest is running & not using raw - disk format and on a block device, then query - highest allocated extent from QEMU */ - if (disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && - format != VIR_STORAGE_FILE_RAW && - S_ISBLK(sb.st_mode) && - virDomainObjIsActive(vm)) { - qemuDomainObjPrivatePtr priv = vm->privateData; - - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_QUERY) < 0) - goto cleanup; - - if (virDomainObjIsActive(vm)) { - qemuDomainObjEnterMonitor(driver, vm); - ret = qemuMonitorGetBlockExtent(priv->mon, - disk->info.alias, - &info->allocation); - qemuDomainObjExitMonitor(driver, vm); - } else { - ret = 0; - } - - if (!qemuDomainObjEndJob(driver, vm)) - vm = NULL; - } else { - ret = 0; - } + ret = qemuDomainGetDiskBlockInfo(driver, vm, disk, info); cleanup: - virStorageFileFreeMetadata(meta); - VIR_FORCE_CLOSE(fd); if (vm) virObjectUnlock(vm); - virObjectUnref(cfg); return ret; } -- 1.8.1.5

With new NBD storage migration approach there are several requirements that need to be meet for successful use of the feature. One of them is - the file representing a disk, needs to have at least same size as on the source. Hence, we must transfer a list of pairs [disk target, size] and check on destination that this requirement is met and/or take actions to meet it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_migration.c | 286 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 281 insertions(+), 5 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 69a9013..5080b0a 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -31,6 +31,9 @@ #endif #include <fcntl.h> #include <poll.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> #include "qemu_migration.h" #include "qemu_monitor.h" @@ -135,6 +138,15 @@ typedef struct _qemuMigrationCookieNBD qemuMigrationCookieNBD; typedef qemuMigrationCookieNBD *qemuMigrationCookieNBDPtr; struct _qemuMigrationCookieNBD { int port; /* on which port does NBD server listen for incoming data */ + + /* The list of pairs [disk, size] (in Bytes). + * This is needed because the same disk size is one of + * prerequisites for NBD storage migration. */ + size_t ndisks; + struct { + char *target; + size_t bytes; + } *disk; }; typedef struct _qemuMigrationCookie qemuMigrationCookie; @@ -197,6 +209,22 @@ qemuMigrationCookieNetworkFree(qemuMigrationCookieNetworkPtr network) } +static void +qemuMigrationCookieNBDFree(qemuMigrationCookieNBDPtr nbd) +{ + size_t i; + + if (!nbd) + return; + + for (i = 0; i < nbd->ndisks; i++) + VIR_FREE(nbd->disk[i].target); + + VIR_FREE(nbd->disk); + VIR_FREE(nbd); +} + + static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) { if (!mig) @@ -208,12 +236,14 @@ static void qemuMigrationCookieFree(qemuMigrationCookiePtr mig) if (mig->flags & QEMU_MIGRATION_COOKIE_NETWORK) qemuMigrationCookieNetworkFree(mig->network); + if (mig->flags & QEMU_MIGRATION_COOKIE_NBD) + qemuMigrationCookieNBDFree(mig->nbd); + VIR_FREE(mig->localHostname); VIR_FREE(mig->remoteHostname); VIR_FREE(mig->name); VIR_FREE(mig->lockState); VIR_FREE(mig->lockDriver); - VIR_FREE(mig->nbd); VIR_FREE(mig); } @@ -518,20 +548,58 @@ qemuMigrationCookieAddNetwork(qemuMigrationCookiePtr mig, static int qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig, - virQEMUDriverPtr driver ATTRIBUTE_UNUSED, + virQEMUDriverPtr driver, virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + int ret = -1; + size_t i; /* It is not a bug if there already is a NBD data */ if (!mig->nbd && VIR_ALLOC(mig->nbd) < 0) - return -1; + return ret; + + /* in Begin phase add info about disks */ + if (priv->job.phase == QEMU_MIGRATION_PHASE_BEGIN3 && + vm->def->ndisks) { + if (VIR_ALLOC_N(mig->nbd->disk, vm->def->ndisks) < 0) + goto cleanup; + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + virDomainBlockInfo info; + + /* Add only non-shared RW disks with source */ + if (!disk->src || disk->shared || disk->readonly) + continue; + + memset(&info, 0, sizeof(info)); + + if (qemuDomainGetDiskBlockInfo(driver, vm, disk, &info) < 0) + goto cleanup; + + /* Explicitly not checking which formats can be pre-created here, + * as we might be talking to newer libvirt which knows more than we + * do now in here. Just let the destination libvirt decide. */ + if (VIR_STRDUP(mig->nbd->disk[mig->nbd->ndisks].target, disk->dst) < 0) + goto cleanup; + + mig->nbd->disk[mig->nbd->ndisks].bytes = info.capacity; + mig->nbd->ndisks++; + } + } mig->nbd->port = priv->nbdPort; mig->flags |= QEMU_MIGRATION_COOKIE_NBD; - return 0; + ret = 0; + +cleanup: + if (ret < 0) + qemuMigrationCookieNBDFree(mig->nbd); + + return ret; } @@ -641,7 +709,16 @@ qemuMigrationCookieXMLFormat(virQEMUDriverPtr driver, virBufferAddLit(buf, " <nbd"); if (mig->nbd->port) virBufferAsprintf(buf, " port='%d'", mig->nbd->port); - virBufferAddLit(buf, "/>\n"); + if (mig->nbd->ndisks) { + virBufferAddLit(buf, ">\n"); + for (i = 0; i < mig->nbd->ndisks; i++) + virBufferAsprintf(buf, " <disk target='%s' size='%zu'/>\n", + mig->nbd->disk[i].target, + mig->nbd->disk[i].bytes); + virBufferAddLit(buf, " </nbd>\n"); + } else { + virBufferAddLit(buf, "/>\n"); + } } virBufferAddLit(buf, "</qemu-migration>\n"); @@ -941,6 +1018,44 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, goto error; } VIR_FREE(port); + + if ((n = virXPathNodeSet("./nbd/disk", ctxt, &nodes)) > 0) { + xmlNodePtr oldNode = ctxt->node; + if (VIR_ALLOC_N(mig->nbd->disk, n) < 0) { + virReportOOMError(); + goto error; + } + mig->nbd->ndisks = n; + + for (i = 0; i < n; i++) { + ctxt->node = nodes[i]; + + tmp = virXPathString("string(./@target)", ctxt); + if (!tmp) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Malformed target attribute")); + goto error; + } + mig->nbd->disk[i].target = tmp; + /* deliberately don't free @tmp here, as the + * cookie has the reference now and it is + * responsible for freeing it later */ + + tmp = virXPathString("string(./@size)", ctxt); + if (virStrToLong_ull(tmp, NULL, 10, (unsigned long long *) + &mig->nbd->disk[i].bytes) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Malformed size attribute '%s'"), + tmp); + VIR_FREE(tmp); + goto error; + } + VIR_FREE(tmp); + } + VIR_FREE(nodes); + ctxt->node = oldNode; + } + VIR_FREE(port); } virObjectUnref(caps); @@ -1389,6 +1504,163 @@ cleanup: return; } +static int +qemuMigrationPreCreateStorageRaw(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainDiskDefPtr disk, + size_t bytes) +{ + int ret = -1; + struct stat sb; + int fd = -1; + bool need_unlink = false; + + if ((fd = qemuOpenFile(driver, vm, disk->src, O_RDWR | O_CREAT, + &need_unlink, NULL)) < 0) + goto cleanup; + + if (fstat(fd, &sb) < 0) { + virReportSystemError(errno, _("Unable to stat '%s'"), disk->src); + goto cleanup; + } + + VIR_DEBUG("File '%s' is %zuB big. Required %zuB", disk->src, sb.st_size, bytes); + if (sb.st_size != bytes && + posix_fallocate(fd, 0, bytes) < 0) { + virReportSystemError(errno, _("Unable to fallocate '%s'"), disk->src); + goto cleanup; + } + + ret = 0; + +cleanup: + VIR_FORCE_CLOSE(fd); + if (ret < 0 && need_unlink && unlink(disk->src)) + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); + return ret; +} + +static int +qemuMigrationPreCreateStorageQCOW(virQEMUDriverPtr driver, + virDomainDiskDefPtr disk, + int format, + size_t bytes) +{ + int ret = -1; + const char *imgBinary = qemuFindQemuImgBinary(driver); + virCommandPtr cmd = NULL; + size_t size_arg; + + if (!imgBinary) + return ret; + + /* Size in KB */ + size_arg = VIR_DIV_UP(bytes, 1024); + + cmd = virCommandNewArgList(imgBinary, "create", "-f", + virStorageFileFormatTypeToString(format), + "-o", "preallocation=metadata", + disk->src, NULL); + + virCommandAddArgFormat(cmd, "%zuK", size_arg); + + if (virCommandRun(cmd, NULL) < 0) + goto unlink; + + ret = 0; + +cleanup: + return ret; + +unlink: + if (unlink(disk->src) < 0) + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); + goto cleanup; +} + +static int +qemuMigrationPreCreateStorage(virQEMUDriverPtr driver, + virDomainObjPtr vm, + qemuMigrationCookiePtr mig) +{ + int ret = -1; + size_t i = 0; + + if (!mig->nbd || !mig->nbd->ndisks) { + /* nothing to do here */ + return 0; + } + + for (i = 0; i < mig->nbd->ndisks; i++) { + virDomainDiskDefPtr disk; + int format; + size_t bytes; + int index; + + index = virDomainDiskIndexByName(vm->def, mig->nbd->disk[i].target, false); + if (index < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("No such disk '%s"), + mig->nbd->disk[i].target); + goto cleanup; + } + + disk = vm->def->disks[i]; + format = disk->format; + bytes = mig->nbd->disk[i].bytes; + + VIR_DEBUG("Checking '%s' of %s format for its size (requested %zuB)", + disk->src, virStorageFileFormatTypeToString(format), bytes); + switch ((enum virStorageFileFormat) format) { + /* These below we know how to pre-create. */ + case VIR_STORAGE_FILE_RAW: + if (qemuMigrationPreCreateStorageRaw(driver, vm, disk, bytes) < 0) + goto cleanup; + break; + case VIR_STORAGE_FILE_QCOW: + case VIR_STORAGE_FILE_QCOW2: + if (qemuMigrationPreCreateStorageQCOW(driver, disk, + format, bytes) < 0) + goto cleanup; + break; + + /* While these we don't know yet. */ + case VIR_STORAGE_FILE_AUTO_SAFE: + case VIR_STORAGE_FILE_AUTO: + case VIR_STORAGE_FILE_NONE: + case VIR_STORAGE_FILE_DIR: + case VIR_STORAGE_FILE_BOCHS: + case VIR_STORAGE_FILE_CLOOP: + case VIR_STORAGE_FILE_COW: + case VIR_STORAGE_FILE_DMG: + case VIR_STORAGE_FILE_ISO: + case VIR_STORAGE_FILE_QED: + case VIR_STORAGE_FILE_VMDK: + case VIR_STORAGE_FILE_VPC: + case VIR_STORAGE_FILE_FAT: + case VIR_STORAGE_FILE_VHD: + case VIR_STORAGE_FILE_VDI: + case VIR_STORAGE_FILE_LAST: + /* Should we error here? As long as user has no control over which + * disks are copied (currently there is no way specifying only a + * set of disks to copy) we can't error here. What we can do is + * leave users with old migration prerequisite: You (users) are + * responsible for creating the storage on the destination. */ + VIR_WARN("Don't know how to pre-create disks of %s type ('%s')", + virStorageFileFormatTypeToString(format), disk->src); + break; + } + } + + ret = 0; +cleanup: + /* free from migration data to prevent + * infinite sending from src to dst and back */ + VIR_FREE(mig->nbd->disk); + mig->nbd->ndisks = 0; + return ret; +} + /* Validate whether the domain is safe to migrate. If vm is NULL, * then this is being run in the v2 Prepare stage on the destination * (where we only have the target xml); if vm is provided, then this @@ -2305,6 +2577,10 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver, QEMU_MIGRATION_COOKIE_NBD))) goto cleanup; + /* pre-create all storage */ + if (qemuMigrationPreCreateStorage(driver, vm, mig) < 0) + goto cleanup; + if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_IN) < 0) goto cleanup; qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PREPARE); -- 1.8.1.5

If migratioin fails because of whatever reason and we've pre-created any disks, we should remove them instead of letting them lying around. Moreover, we need to save the disks sources into domain status file in case libvirtd gets restarted. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.c | 29 +++++++++++++++++++++++++++-- src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_migration.c | 18 ++++++++++++++++++ src/qemu/qemu_process.c | 8 ++++++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 64cd278..9117a68 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -239,11 +239,17 @@ error: static void qemuDomainObjPrivateFree(void *data) { + size_t i; qemuDomainObjPrivatePtr priv = data; virObjectUnref(priv->qemuCaps); virCgroupFree(&priv->cgroup); + + for (i = 0; i < priv->nnbdDisk; i++) + VIR_FREE(priv->nbdDisk[i]); + VIR_FREE(priv->nbdDisk); + qemuDomainPCIAddressSetFree(priv->pciaddrs); qemuDomainCCWAddressSetFree(priv->ccwaddrs); virDomainChrSourceDefFree(priv->monConfig); @@ -275,6 +281,7 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data) qemuDomainObjPrivatePtr priv = data; const char *monitorpath; enum qemuDomainJob job; + size_t i; /* priv->monitor_chr is set only for qemu */ if (priv->monConfig) { @@ -297,7 +304,6 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data) if (priv->nvcpupids) { - size_t i; virBufferAddLit(buf, " <vcpus>\n"); for (i = 0; i < priv->nvcpupids; i++) { virBufferAsprintf(buf, " <vcpu pid='%d'/>\n", priv->vcpupids[i]); @@ -306,7 +312,6 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data) } if (priv->qemuCaps) { - size_t i; virBufferAddLit(buf, " <qemuCaps>\n"); for (i = 0; i < QEMU_CAPS_LAST; i++) { if (virQEMUCapsGet(priv->qemuCaps, i)) { @@ -350,6 +355,13 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf, void *data) virBufferAddLit(buf, " </devices>\n"); } + if (priv->nnbdDisk) { + virBufferAddLit(buf, " <nbdDisk>\n"); + for (i = 0; i < priv->nnbdDisk; i++) + virBufferAsprintf(buf, " <disk src='%s'/>\n", priv->nbdDisk[i]); + virBufferAddLit(buf, " </nbdDisk>\n"); + } + return 0; } @@ -511,6 +523,19 @@ qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt, void *data) } VIR_FREE(nodes); + n = virXPathNodeSet("./nbdDisk/disk", ctxt, &nodes); + if (n < 0) + goto error; + if (n) { + if (VIR_REALLOC_N(priv->nbdDisk, n) < 0) + goto error; + priv->nnbdDisk = n; + + for (i = 0; i < n; i++) + if (!(priv->nbdDisk[i] = virXMLPropString(nodes[i], "src"))) + goto error; + } + return 0; error: diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index b6e6e33..ff872c2 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -160,6 +160,8 @@ struct _qemuDomainObjPrivate { unsigned long migMaxBandwidth; char *origname; int nbdPort; /* Port used for migration with NBD */ + size_t nnbdDisk; + char **nbdDisk; /* src of disks we want to transfer via NBD */ virChrdevsPtr devs; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 5080b0a..931853e 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1585,6 +1585,7 @@ qemuMigrationPreCreateStorage(virQEMUDriverPtr driver, { int ret = -1; size_t i = 0; + qemuDomainObjPrivatePtr priv = vm->privateData; if (!mig->nbd || !mig->nbd->ndisks) { /* nothing to do here */ @@ -1596,6 +1597,7 @@ qemuMigrationPreCreateStorage(virQEMUDriverPtr driver, int format; size_t bytes; int index; + char *path = NULL; index = virDomainDiskIndexByName(vm->def, mig->nbd->disk[i].target, false); if (index < 0) { @@ -1616,12 +1618,14 @@ qemuMigrationPreCreateStorage(virQEMUDriverPtr driver, case VIR_STORAGE_FILE_RAW: if (qemuMigrationPreCreateStorageRaw(driver, vm, disk, bytes) < 0) goto cleanup; + path = disk->src; break; case VIR_STORAGE_FILE_QCOW: case VIR_STORAGE_FILE_QCOW2: if (qemuMigrationPreCreateStorageQCOW(driver, disk, format, bytes) < 0) goto cleanup; + path = disk->src; break; /* While these we don't know yet. */ @@ -1650,6 +1654,13 @@ qemuMigrationPreCreateStorage(virQEMUDriverPtr driver, virStorageFileFormatTypeToString(format), disk->src); break; } + + if (path) { + if (VIR_REALLOC_N(priv->nbdDisk, priv->nnbdDisk + 1) < 0 || + VIR_STRDUP(priv->nbdDisk[priv->nnbdDisk], path) < 0) + goto cleanup; + priv->nnbdDisk++; + } } ret = 0; @@ -4594,6 +4605,7 @@ qemuMigrationFinish(virQEMUDriverPtr driver, qemuDomainObjPrivatePtr priv = vm->privateData; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); virCapsPtr caps = NULL; + size_t i; VIR_DEBUG("driver=%p, dconn=%p, vm=%p, cookiein=%s, cookieinlen=%d, " "cookieout=%p, cookieoutlen=%p, flags=%lx, retcode=%d", @@ -4747,6 +4759,12 @@ qemuMigrationFinish(virQEMUDriverPtr driver, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); } + + /* Free disks transferred via NBD so they don't get deleted */ + for (i = 0; i < priv->nnbdDisk; i++) + VIR_FREE(priv->nbdDisk[i]); + VIR_FREE(priv->nbdDisk); + priv->nnbdDisk = 0; } if (virDomainObjIsActive(vm) && diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index c7cec5a..34c0292 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -4167,6 +4167,14 @@ void qemuProcessStop(virQEMUDriverPtr driver, virPortAllocatorRelease(driver->remotePorts, priv->nbdPort); priv->nbdPort = 0; + for (i = 0; i < priv->nnbdDisk; i++) { + VIR_DEBUG("Unlinking %s due to unsuccessful NBD transfer", + priv->nbdDisk[i]); + if (unlink(priv->nbdDisk[i]) < 0) + virReportSystemError(errno, _("Unable to unlink %s"), + priv->nbdDisk[i]); + } + if (priv->agent) { qemuAgentClose(priv->agent); priv->agent = NULL; -- 1.8.1.5
participants (1)
-
Michal Privoznik