On Mon, Feb 18, 2013 at 15:38:42 +0100, Michal Privoznik wrote:
With new NBD storage migration approach there are several
requirements that need to be meet for successful use of the
s/meet/met/
feature. One of them is - the file representing a disk, needs to
have at least same size as on the source. Hence, we must transfer
a list of pairs [disk target, size] and check on destination that
this requirement is met and/or take actions to meet it.
---
src/qemu/qemu_migration.c | 302 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 288 insertions(+), 14 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index e2c7804..9b38a0c 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
...
@@ -525,22 +555,71 @@
qemuMigrationCookieAddNetwork(qemuMigrationCookiePtr mig,
static int
qemuMigrationCookieAddNBD(qemuMigrationCookiePtr mig,
- virQEMUDriverPtr driver ATTRIBUTE_UNUSED,
+ virQEMUDriverPtr driver,
virDomainObjPtr vm)
{
qemuDomainObjPrivatePtr priv = vm->privateData;
+ int ret = -1;
+ size_t i;
/* It is not a bug if there already is a NBD data */
if (!mig->nbd &&
VIR_ALLOC(mig->nbd) < 0) {
virReportOOMError();
- return -1;
+ return ret;
+ }
+
+ /* in Begin phase add info about disks */
+ if (priv->job.phase == QEMU_MIGRATION_PHASE_BEGIN3 &&
+ vm->def->ndisks) {
+ if (VIR_ALLOC_N(mig->nbd->disk, vm->def->ndisks) < 0) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ for (i = 0; i < vm->def->ndisks; i++) {
+ virDomainDiskDefPtr disk = vm->def->disks[i];
+ virDomainBlockInfo info;
+ int format = disk->format;
+
+ /* Add only non-shared RW disks with source */
+ if (!disk->src || disk->shared || disk->readonly)
+ continue;
+
+ memset(&info, 0, sizeof(info));
+
+ if (qemuDomainGetDiskBlockInfo(driver, vm, disk, &info) < 0)
+ goto cleanup;
Hmm, I see solving the need to enter a job for getting disk block info
won't be exactly easy.
+
+ if (format != VIR_STORAGE_FILE_RAW &&
+ format != VIR_STORAGE_FILE_QCOW &&
+ format != VIR_STORAGE_FILE_QCOW2) {
+ virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
+ _("Cannot pre-create storage of %s format"),
+ virStorageFileFormatTypeToString(format));
+ goto cleanup;
+ }
+
+ if (!(mig->nbd->disk[mig->nbd->ndisks].target =
strdup(disk->dst))) {
+ virReportOOMError();
+ goto cleanup;
+ }
+
+ mig->nbd->disk[mig->nbd->ndisks].bytes = info.capacity;
+ mig->nbd->ndisks++;
+ }
}
mig->nbd->port = priv->nbdPort;
mig->flags |= QEMU_MIGRATION_COOKIE_NBD;
- return 0;
+ ret = 0;
+
+cleanup:
+ if (ret < 0)
+ qemuMigrationCookieNBDFree(mig->nbd);
+
+ return ret;
}
...
@@ -946,20 +1034,56 @@
qemuMigrationCookieXMLParse(qemuMigrationCookiePtr mig,
virXPathBoolean("boolean(./nbd)", ctxt)) {
char *port;
+ if (VIR_ALLOC(mig->nbd) < 0) {
+ virReportOOMError();
+ goto error;
+ }
+
port = virXPathString("string(./nbd/@port)", ctxt);
- if (port) {
- if (VIR_ALLOC(mig->nbd) < 0) {
+ if (port && virStrToLong_i(port, NULL, 10, &mig->nbd->port)
< 0) {
+ VIR_FREE(port);
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Malformed nbd port '%s'"),
+ port);
Oops, I believe you wanted to swap these two lines :-)
+ goto error;
+ }
+ VIR_FREE(port);
+
+ if ((n = virXPathNodeSet("./nbd/disk", ctxt, &nodes)) > 0) {
+ xmlNodePtr oldNode = ctxt->node;
+ if (VIR_ALLOC_N(mig->nbd->disk, n) < 0) {
virReportOOMError();
goto error;
}
- if (virStrToLong_i(port, NULL, 10, &mig->nbd->port) < 0) {
- VIR_FREE(port);
- virReportError(VIR_ERR_INTERNAL_ERROR,
- _("Malformed nbd port '%s'"),
- port);
- goto error;
+ mig->nbd->ndisks = n;
+
+ for (i = 0; i < n; i++) {
+ ctxt->node = nodes[i];
When accessing just attributes, it's usually easier to just call
virXMLPropString(nodes[i], "name") instead of using XPath machinery.
+
+ tmp = virXPathString("string(./@target)", ctxt);
+ if (!tmp) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("Malformed target attribute"));
I think this should rather be "Malformed NBD disk target attribute".
+ goto error;
+ }
+ mig->nbd->disk[i].target = tmp;
+ /* deliberately don't free @tmp here, as the
+ * cookie has the reference now and it is
+ * responsible for freeing it later */
+
+ tmp = virXPathString("string(./@size)", ctxt);
+ if (virStrToLong_ull(tmp, NULL, 10, (unsigned long long *)
+ &mig->nbd->disk[i].bytes) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Malformed size attribute '%s'"),
You should mention NBD disk here as well.
+ tmp);
+ VIR_FREE(tmp);
+ goto error;
+ }
+ VIR_FREE(tmp);
}
- VIR_FREE(port);
+ VIR_FREE(nodes);
+ ctxt->node = oldNode;
}
}
@@ -1352,6 +1476,152 @@ error:
goto cleanup;
}
+static int
+qemuMigrationPreCreateStorageRaw(virDomainDiskDefPtr disk,
+ size_t bytes)
+{
+ int ret = -1;
+ struct stat sb;
+ int fd = -1;
+
+ if ((fd = virFileOpenAs(disk->src, O_RDWR | O_CREAT, 0660,
+ -1, -1, VIR_FILE_OPEN_NOFORK)) < 0) {
+ virReportSystemError(errno, _("Unable to create '%s'"),
disk->src);
+ goto cleanup;
+ }
+
+ if (fstat(fd, &sb) < 0) {
+ virReportSystemError(errno, _("Unable to stat '%s'"),
disk->src);
+ goto unlink;
+ }
+
+ VIR_DEBUG("File '%s' is %zuB big. Required %zuB", disk->src,
sb.st_size, bytes);
+ if (sb.st_size < bytes) {
+ if (ftruncate(fd, bytes) < 0) {
+ virReportSystemError(errno, _("Unable to ftruncate '%s'"),
disk->src);
+ goto unlink;
+ }
Shouldn't we rather call posix_fallocate or something similar?
+
+ if (fsync(fd) < 0) {
+ ret = -errno;
No need to pass errno to the caller.
+ virReportSystemError(errno, _("cannot sync data to
file '%s'"),
+ disk->src);
+ goto unlink;
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ VIR_FORCE_CLOSE(fd);
You should really call VIR_CLOSE(fd) here and check for errors.
+ return ret;
+
+unlink:
+ if (unlink(disk->src) < 0)
+ VIR_WARN("Unable to unlink '%s': %d", disk->src, errno);
+ goto cleanup;
What if the file was already there but it was just too small and making
it larger failed. I don't think you want to unlink it in this case.
+}
+
+static int
+qemuMigrationPreCreateStorageQCOW(virQEMUDriverPtr driver,
+ virDomainDiskDefPtr disk,
+ int format,
+ size_t bytes)
+{
+ int ret = -1;
+ const char *imgBinary = qemuFindQemuImgBinary(driver);
+ virCommandPtr cmd = NULL;
+ size_t size_arg;
+
+ if (!imgBinary)
+ return ret;
+
+ /* Size in KB */
+ size_arg = VIR_DIV_UP(bytes, 1024);
+
+ cmd = virCommandNewArgList(imgBinary, "create", "-f",
+ virStorageFileFormatTypeToString(format),
+ "-o", "preallocation=metadata",
+ disk->src, NULL);
Wrong indentation.
+
+ virCommandAddArgFormat(cmd, "%zuK", size_arg);
Any particular reason why we need to round up the size when we can just
use bytes?
virCommandAddArgFormat(cmd, "%zu", bytes);
+
+ if (virCommandRun(cmd, NULL) < 0)
+ goto unlink;
What happens if the QCOW is already there?
> +
> + ret = 0;
> +
> +cleanup:
+ return ret;
+
+unlink:
+ if (unlink(disk->src) < 0)
+ VIR_WARN("Unable to unlink '%s': %d", disk->src, errno);
+ goto cleanup;
> +}
Anyway, I feel like all this storage creation code should be placed in a
storage driver somewhere.
+
+static int
+qemuMigrationPreCreateStorage(virQEMUDriverPtr driver,
+ virDomainObjPtr vm,
+ qemuMigrationCookiePtr mig)
+{
+ int ret = -1;
+ size_t i = 0;
+
+ if (!mig->nbd || !mig->nbd->ndisks) {
+ /* nothing to do here */
+ return 0;
+ }
+
+ for (i = 0; i < mig->nbd->ndisks; i++) {
+ virDomainDiskDefPtr disk;
+ int format;
+ size_t bytes;
+ int index;
This fails to compile:
error: declaration of 'index' shadows a global declaration
+
+ index = virDomainDiskIndexByName(vm->def, mig->nbd->disk[i].target,
false);
+ if (index < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("No such disk '%s"),
+ mig->nbd->disk[i].target);
+ goto cleanup;
+ }
+
+ disk = vm->def->disks[i];
As John already said, I think you want vm->def->disks[index] here.
+ format = disk->format;
+ bytes = mig->nbd->disk[i].bytes;
+
+ VIR_DEBUG("Checking '%s' of %s format for its size (requested
%zuB)",
+ disk->src, virStorageFileFormatTypeToString(format), bytes);
+ switch (format) {
+ case VIR_STORAGE_FILE_RAW:
+ if (qemuMigrationPreCreateStorageRaw(disk, bytes) < 0)
+ goto cleanup;
+ break;
+ case VIR_STORAGE_FILE_QCOW:
+ case VIR_STORAGE_FILE_QCOW2:
+ if (qemuMigrationPreCreateStorageQCOW(driver, disk,
+ format, bytes) < 0)
+ goto cleanup;
+ break;
+ default:
+ virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
+ _("Cannot pre-create disks of %s format"),
+ virStorageFileFormatTypeToString(format));
+ goto cleanup;
+ }
+
+ }
+
+ ret = 0;
+cleanup:
+ /* free from migration data to prevent
+ * infinite sending from src to dst and back */
+ VIR_FREE(mig->nbd->disk);
This leaks all disk targets.
+ mig->nbd->ndisks = 0;
+ return ret;
+}
+
/* Validate whether the domain is safe to migrate. If vm is NULL,
* then this is being run in the v2 Prepare stage on the destination
* (where we only have the target xml); if vm is provided, then this
@@ -2014,6 +2284,10 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver,
QEMU_MIGRATION_COOKIE_NBD)))
goto cleanup;
+ /* pre-create all storage */
+ if (qemuMigrationPreCreateStorage(driver, vm, mig) < 0)
+ goto cleanup;
+
What happens if the disks we are pre-creating exist and are used by
another domain on the destination host? :-P Normally, migration would
fail if locking manager is in use but with your code, we'd just happily
overwrite all disks, right? :-) I'm afraid we should deal with lock
manager somehow to avoid doing that :-/
if (qemuMigrationJobStart(driver, vm,
QEMU_ASYNC_JOB_MIGRATION_IN) < 0)
goto cleanup;
qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PREPARE);
Jirka