
On Mon, Feb 18, 2013 at 15:38:42 +0100, Michal Privoznik wrote:
With new NBD storage migration approach there are several requirements that need to be meet for successful use of the
s/meet/met/
feature. One of them is - the file representing a disk, needs to have at least same size as on the source. Hence, we must transfer a list of pairs [disk target, size] and check on destination that this requirement is met and/or take actions to meet it. --- src/qemu/qemu_migration.c | 302 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 288 insertions(+), 14 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index e2c7804..9b38a0c 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c ... @@ -525,22 +555,71 @@ qemuMigrationCookieAddNetwork(qemuMigrationCookiePtr mig,
static int qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig, - virQEMUDriverPtr driver ATTRIBUTE_UNUSED, + virQEMUDriverPtr driver, virDomainObjPtr vm) { qemuDomainObjPrivatePtr priv = vm->privateData; + int ret = -1; + size_t i;
/* It is not a bug if there already is a NBD data */ if (!mig->nbd && VIR_ALLOC(mig->nbd) < 0) { virReportOOMError(); - return -1; + return ret; + } + + /* in Begin phase add info about disks */ + if (priv->job.phase == QEMU_MIGRATION_PHASE_BEGIN3 && + vm->def->ndisks) { + if (VIR_ALLOC_N(mig->nbd->disk, vm->def->ndisks) < 0) { + virReportOOMError(); + goto cleanup; + } + + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + virDomainBlockInfo info; + int format = disk->format; + + /* Add only non-shared RW disks with source */ + if (!disk->src || disk->shared || disk->readonly) + continue; + + memset(&info, 0, sizeof(info)); + + if (qemuDomainGetDiskBlockInfo(driver, vm, disk, &info) < 0) + goto cleanup;
Hmm, I see solving the need to enter a job for getting disk block info won't be exactly easy.
+ + if (format != VIR_STORAGE_FILE_RAW && + format != VIR_STORAGE_FILE_QCOW && + format != VIR_STORAGE_FILE_QCOW2) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, + _("Cannot pre-create storage of %s format"), + virStorageFileFormatTypeToString(format)); + goto cleanup; + } + + if (!(mig->nbd->disk[mig->nbd->ndisks].target = strdup(disk->dst))) { + virReportOOMError(); + goto cleanup; + } + + mig->nbd->disk[mig->nbd->ndisks].bytes = info.capacity; + mig->nbd->ndisks++; + } }
mig->nbd->port = priv->nbdPort; mig->flags |= QEMU_MIGRATION_COOKIE_NBD;
- return 0; + ret = 0; + +cleanup: + if (ret < 0) + qemuMigrationCookieNBDFree(mig->nbd); + + return ret; }
...
@@ -946,20 +1034,56 @@ qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig, virXPathBoolean("boolean(./nbd)", ctxt)) { char *port;
+ if (VIR_ALLOC(mig->nbd) < 0) { + virReportOOMError(); + goto error; + } + port = virXPathString("string(./nbd/@port)", ctxt); - if (port) { - if (VIR_ALLOC(mig->nbd) < 0) { + if (port && virStrToLong_i(port, NULL, 10, &mig->nbd->port) < 0) { + VIR_FREE(port); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Malformed nbd port '%s'"), + port);
Oops, I believe you wanted to swap these two lines :-)
+ goto error; + } + VIR_FREE(port); + + if ((n = virXPathNodeSet("./nbd/disk", ctxt, &nodes)) > 0) { + xmlNodePtr oldNode = ctxt->node; + if (VIR_ALLOC_N(mig->nbd->disk, n) < 0) { virReportOOMError(); goto error; } - if (virStrToLong_i(port, NULL, 10, &mig->nbd->port) < 0) { - VIR_FREE(port); - virReportError(VIR_ERR_INTERNAL_ERROR, - _("Malformed nbd port '%s'"), - port); - goto error; + mig->nbd->ndisks = n; + + for (i = 0; i < n; i++) { + ctxt->node = nodes[i];
When accessing just attributes, it's usually easier to just call virXMLPropString(nodes[i], "name") instead of using XPath machinery.
+ + tmp = virXPathString("string(./@target)", ctxt); + if (!tmp) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Malformed target attribute"));
I think this should rather be "Malformed NBD disk target attribute".
+ goto error; + } + mig->nbd->disk[i].target = tmp; + /* deliberately don't free @tmp here, as the + * cookie has the reference now and it is + * responsible for freeing it later */ + + tmp = virXPathString("string(./@size)", ctxt); + if (virStrToLong_ull(tmp, NULL, 10, (unsigned long long *) + &mig->nbd->disk[i].bytes) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Malformed size attribute '%s'"),
You should mention NBD disk here as well.
+ tmp); + VIR_FREE(tmp); + goto error; + } + VIR_FREE(tmp); } - VIR_FREE(port); + VIR_FREE(nodes); + ctxt->node = oldNode; } }
@@ -1352,6 +1476,152 @@ error: goto cleanup; }
+static int +qemuMigrationPreCreateStorageRaw(virDomainDiskDefPtr disk, + size_t bytes) +{ + int ret = -1; + struct stat sb; + int fd = -1; + + if ((fd = virFileOpenAs(disk->src, O_RDWR | O_CREAT, 0660, + -1, -1, VIR_FILE_OPEN_NOFORK)) < 0) { + virReportSystemError(errno, _("Unable to create '%s'"), disk->src); + goto cleanup; + } + + if (fstat(fd, &sb) < 0) { + virReportSystemError(errno, _("Unable to stat '%s'"), disk->src); + goto unlink; + } + + VIR_DEBUG("File '%s' is %zuB big. Required %zuB", disk->src, sb.st_size, bytes); + if (sb.st_size < bytes) { + if (ftruncate(fd, bytes) < 0) { + virReportSystemError(errno, _("Unable to ftruncate '%s'"), disk->src); + goto unlink; + }
Shouldn't we rather call posix_fallocate or something similar?
+ + if (fsync(fd) < 0) { + ret = -errno;
No need to pass errno to the caller.
+ virReportSystemError(errno, _("cannot sync data to file '%s'"), + disk->src); + goto unlink; + } + } + + ret = 0; + +cleanup: + VIR_FORCE_CLOSE(fd);
You should really call VIR_CLOSE(fd) here and check for errors.
+ return ret; + +unlink: + if (unlink(disk->src) < 0) + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); + goto cleanup;
What if the file was already there but it was just too small and making it larger failed. I don't think you want to unlink it in this case.
+} + +static int +qemuMigrationPreCreateStorageQCOW(virQEMUDriverPtr driver, + virDomainDiskDefPtr disk, + int format, + size_t bytes) +{ + int ret = -1; + const char *imgBinary = qemuFindQemuImgBinary(driver); + virCommandPtr cmd = NULL; + size_t size_arg; + + if (!imgBinary) + return ret; + + /* Size in KB */ + size_arg = VIR_DIV_UP(bytes, 1024); + + cmd = virCommandNewArgList(imgBinary, "create", "-f", + virStorageFileFormatTypeToString(format), + "-o", "preallocation=metadata", + disk->src, NULL);
Wrong indentation.
+ + virCommandAddArgFormat(cmd, "%zuK", size_arg);
Any particular reason why we need to round up the size when we can just use bytes? virCommandAddArgFormat(cmd, "%zu", bytes);
+ + if (virCommandRun(cmd, NULL) < 0) + goto unlink;
What happens if the QCOW is already there?
+ + ret = 0; + +cleanup: + return ret; + +unlink: + if (unlink(disk->src) < 0) + VIR_WARN("Unable to unlink '%s': %d", disk->src, errno); + goto cleanup; +}
Anyway, I feel like all this storage creation code should be placed in a storage driver somewhere.
+ +static int +qemuMigrationPreCreateStorage(virQEMUDriverPtr driver, + virDomainObjPtr vm, + qemuMigrationCookiePtr mig) +{ + int ret = -1; + size_t i = 0; + + if (!mig->nbd || !mig->nbd->ndisks) { + /* nothing to do here */ + return 0; + } + + for (i = 0; i < mig->nbd->ndisks; i++) { + virDomainDiskDefPtr disk; + int format; + size_t bytes; + int index;
This fails to compile: error: declaration of 'index' shadows a global declaration
+ + index = virDomainDiskIndexByName(vm->def, mig->nbd->disk[i].target, false); + if (index < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("No such disk '%s"), + mig->nbd->disk[i].target); + goto cleanup; + } + + disk = vm->def->disks[i];
As John already said, I think you want vm->def->disks[index] here.
+ format = disk->format; + bytes = mig->nbd->disk[i].bytes; + + VIR_DEBUG("Checking '%s' of %s format for its size (requested %zuB)", + disk->src, virStorageFileFormatTypeToString(format), bytes); + switch (format) { + case VIR_STORAGE_FILE_RAW: + if (qemuMigrationPreCreateStorageRaw(disk, bytes) < 0) + goto cleanup; + break; + case VIR_STORAGE_FILE_QCOW: + case VIR_STORAGE_FILE_QCOW2: + if (qemuMigrationPreCreateStorageQCOW(driver, disk, + format, bytes) < 0) + goto cleanup; + break; + default: + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, + _("Cannot pre-create disks of %s format"), + virStorageFileFormatTypeToString(format)); + goto cleanup; + } + + } + + ret = 0; +cleanup: + /* free from migration data to prevent + * infinite sending from src to dst and back */ + VIR_FREE(mig->nbd->disk);
This leaks all disk targets.
+ mig->nbd->ndisks = 0; + return ret; +} + /* Validate whether the domain is safe to migrate. If vm is NULL, * then this is being run in the v2 Prepare stage on the destination * (where we only have the target xml); if vm is provided, then this @@ -2014,6 +2284,10 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver, QEMU_MIGRATION_COOKIE_NBD))) goto cleanup;
+ /* pre-create all storage */ + if (qemuMigrationPreCreateStorage(driver, vm, mig) < 0) + goto cleanup; +
What happens if the disks we are pre-creating exist and are used by another domain on the destination host? :-P Normally, migration would fail if locking manager is in use but with your code, we'd just happily overwrite all disks, right? :-) I'm afraid we should deal with lock manager somehow to avoid doing that :-/
if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_IN) < 0) goto cleanup; qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PREPARE);
Jirka