[libvirt] [PATCH] qemu: fix fail to start vm with one cell use memdev and another not

https://bugzilla.redhat.com/show_bug.cgi?id=1182467 We changed our build numa qemu cmd line way in f309db, But qemu cannot accept one cell use memdev and another not, so this cause a issue when we start a vm with one cell have hugpage or nodemask settings and others not. After this patch we will use memdev for all cells if there is at least one cell need this. Signed-off-by: Luyao Huang <lhuang@redhat.com> --- src/qemu/qemu_command.c | 88 +++++++++++++--------- .../qemuxml2argv-numatune-memnode-no-memory.args | 3 +- 2 files changed, 55 insertions(+), 36 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 3cafbfb..11ca355 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -6613,14 +6613,18 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, virBitmapPtr nodeset) { size_t i, j; - virBuffer buf = VIR_BUFFER_INITIALIZER; + virBuffer buf[def->cpu->ncells]; virDomainHugePagePtr master_hugepage = NULL; char *cpumask = NULL, *tmpmask = NULL, *next = NULL; char *nodemask = NULL; char *mem_path = NULL; int ret = -1; + bool need_memdev = false; const long system_page_size = sysconf(_SC_PAGESIZE) / 1024; + for (i = 0; i < def->cpu->ncells; i++) + memset(&buf[i], 0, sizeof(buf[i])); + if (virDomainNumatuneHasPerNodeBinding(def->numatune) && !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) || virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE))) { @@ -6672,20 +6676,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, def->cpu->cells[i].mem = cellmem * 1024; virMemAccess memAccess = def->cpu->cells[i].memAccess; - VIR_FREE(cpumask); VIR_FREE(nodemask); - if (!(cpumask = virBitmapFormat(def->cpu->cells[i].cpumask))) - goto cleanup; - - if (strchr(cpumask, ',') && - !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("disjoint NUMA cpu ranges are not supported " - "with this QEMU")); - goto cleanup; - } - if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_RAM) || virQEMUCapsGet(qemuCaps, QEMU_CAPS_OBJECT_MEMORY_FILE)) { virDomainNumatuneMemMode mode; @@ -6752,17 +6744,17 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, if (!(mem_path = qemuGetHugepagePath(&cfg->hugetlbfs[j]))) goto cleanup; - virBufferAsprintf(&buf, + virBufferAsprintf(&buf[i], "memory-backend-file,prealloc=yes,mem-path=%s", mem_path); switch (memAccess) { case VIR_MEM_ACCESS_SHARED: - virBufferAddLit(&buf, ",share=on"); + virBufferAddLit(&buf[i], ",share=on"); break; case VIR_MEM_ACCESS_PRIVATE: - virBufferAddLit(&buf, ",share=off"); + virBufferAddLit(&buf[i], ",share=off"); break; case VIR_MEM_ACCESS_DEFAULT: @@ -6777,10 +6769,10 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, "only with hugepages")); goto cleanup; } - virBufferAddLit(&buf, "memory-backend-ram"); + virBufferAddLit(&buf[i], "memory-backend-ram"); } - virBufferAsprintf(&buf, ",size=%lluM,id=ram-node%zu", cellmem, i); + virBufferAsprintf(&buf[i], ",size=%lluM,id=ram-node%zu", cellmem, i); if (virDomainNumatuneMaybeFormatNodeset(def->numatune, nodeset, &nodemask, i) < 0) @@ -6798,19 +6790,15 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, for (tmpmask = nodemask; tmpmask; tmpmask = next) { if ((next = strchr(tmpmask, ','))) *(next++) = '\0'; - virBufferAddLit(&buf, ",host-nodes="); - virBufferAdd(&buf, tmpmask, -1); + virBufferAddLit(&buf[i], ",host-nodes="); + virBufferAdd(&buf[i], tmpmask, -1); } - virBufferAsprintf(&buf, ",policy=%s", policy); + virBufferAsprintf(&buf[i], ",policy=%s", policy); } - if (hugepage || nodemask) { - virCommandAddArg(cmd, "-object"); - virCommandAddArgBuffer(cmd, &buf); - } else { - virBufferFreeAndReset(&buf); - } + if (hugepage || nodemask) + need_memdev = true; } else { if (memAccess) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -6819,23 +6807,52 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, goto cleanup; } } + } + + /*We have finnished build args for all cells, and know + *if we can use memdev or no need use it. If there is + *one cells have hugepage or nodemask settings, we need + *build all cells with -object memory-backend-ram, if not + *just use -numa.*/ + for (i = 0; i < def->cpu->ncells; i++) { + unsigned long long cellmem = VIR_DIV_UP(def->cpu->cells[i].mem, 1024); + def->cpu->cells[i].mem = cellmem * 1024; + VIR_FREE(cpumask); + + if (need_memdev) { + virCommandAddArg(cmd, "-object"); + virCommandAddArgBuffer(cmd, &buf[i]); + } else { + virBufferFreeAndReset(&buf[i]); + } virCommandAddArg(cmd, "-numa"); - virBufferAsprintf(&buf, "node,nodeid=%zu", i); + virBufferAsprintf(&buf[i], "node,nodeid=%zu", i); + + if (!(cpumask = virBitmapFormat(def->cpu->cells[i].cpumask))) + goto cleanup; + + if (strchr(cpumask, ',') && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_NUMA)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("disjoint NUMA cpu ranges are not supported " + "with this QEMU")); + goto cleanup; + } for (tmpmask = cpumask; tmpmask; tmpmask = next) { if ((next = strchr(tmpmask, ','))) *(next++) = '\0'; - virBufferAddLit(&buf, ",cpus="); - virBufferAdd(&buf, tmpmask, -1); + virBufferAddLit(&buf[i], ",cpus="); + virBufferAdd(&buf[i], tmpmask, -1); } - if (hugepage || nodemask) - virBufferAsprintf(&buf, ",memdev=ram-node%zu", i); + if (need_memdev) + virBufferAsprintf(&buf[i], ",memdev=ram-node%zu", i); else - virBufferAsprintf(&buf, ",mem=%llu", cellmem); + virBufferAsprintf(&buf[i], ",mem=%llu", cellmem); - virCommandAddArgBuffer(cmd, &buf); + virCommandAddArgBuffer(cmd, &buf[i]); } ret = 0; @@ -6843,7 +6860,8 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg, VIR_FREE(cpumask); VIR_FREE(nodemask); VIR_FREE(mem_path); - virBufferFreeAndReset(&buf); + for (i = 0; i < def->cpu->ncells; i++) + virBufferFreeAndReset(&buf[i]); return ret; } diff --git a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args index 2addf97..b0e274c 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-numatune-memnode-no-memory.args @@ -2,6 +2,7 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test QEMU_AUDIO_DRV=none \ /usr/bin/kvm -S -M pc -m 64 -smp 2 \ -object memory-backend-ram,size=32M,id=ram-node0,host-nodes=3,policy=preferred \ -numa node,nodeid=0,cpus=0,memdev=ram-node0 \ --numa node,nodeid=1,cpus=1,mem=32 \ +-object memory-backend-ram,size=32M,id=ram-node1 \ +-numa node,nodeid=1,cpus=1,memdev=ram-node1 \ -nographic -monitor unix:/tmp/test-monitor,server,nowait \ -no-acpi -boot c -usb -net none -serial none -parallel none -- 1.8.3.1

On Fri, Jan 16, 2015 at 10:12:49 +0800, Luyao Huang wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1182467
We changed our build numa qemu cmd line way in f309db, But qemu cannot accept one cell use memdev and another not, so this cause a issue when we start a vm with one cell have hugpage or nodemask settings and others not.
After this patch we will use memdev for all cells if there is at least one cell need this.
Signed-off-by: Luyao Huang <lhuang@redhat.com> --- src/qemu/qemu_command.c | 88 +++++++++++++--------- .../qemuxml2argv-numatune-memnode-no-memory.args | 3 +- 2 files changed, 55 insertions(+), 36 deletions(-)
I'm currently refactoring this piece of code and this patch would be mostly reverted and reworked in the end, thus I'm not going to commit it and I'll fix the bug in the new code after I'm finished with the refactors. Thanks for the patch though. Peter

On 01/16/2015 04:48 PM, Peter Krempa wrote:
On Fri, Jan 16, 2015 at 10:12:49 +0800, Luyao Huang wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1182467
We changed our build numa qemu cmd line way in f309db, But qemu cannot accept one cell use memdev and another not, so this cause a issue when we start a vm with one cell have hugpage or nodemask settings and others not.
After this patch we will use memdev for all cells if there is at least one cell need this.
Signed-off-by: Luyao Huang <lhuang@redhat.com> --- src/qemu/qemu_command.c | 88 +++++++++++++--------- .../qemuxml2argv-numatune-memnode-no-memory.args | 3 +- 2 files changed, 55 insertions(+), 36 deletions(-)
I'm currently refactoring this piece of code and this patch would be mostly reverted and reworked in the end, thus I'm not going to commit it and I'll fix the bug in the new code after I'm finished with the refactors.
Thanks for the patch though.
No problem, i am looking forward your patch:)
Peter
Luyao

On 16.01.2015 03:12, Luyao Huang wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1182467
We changed our build numa qemu cmd line way in f309db, But qemu cannot accept one cell use memdev and another not, so this cause a issue when we start a vm with one cell have hugpage or nodemask settings and others not.
After this patch we will use memdev for all cells if there is at least one cell need this.
Signed-off-by: Luyao Huang <lhuang@redhat.com> --- src/qemu/qemu_command.c | 88 +++++++++++++--------- .../qemuxml2argv-numatune-memnode-no-memory.args | 3 +- 2 files changed, 55 insertions(+), 36 deletions(-)
While I agree the bug you are trying to fix is real bug, the patch look a bit messy. Moreover, as far as I know Peter is doing work in this area and he's willing to fix the bug. You know, he's rewriting the area from scratch (or something like that), so I don't really want to cause a rebase conflict to him. Moreover, some tests fail after this patch, so I will not merge this for now, sorry. Michal

On 01/16/2015 04:54 PM, Michal Privoznik wrote:
On 16.01.2015 03:12, Luyao Huang wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1182467
We changed our build numa qemu cmd line way in f309db, But qemu cannot accept one cell use memdev and another not, so this cause a issue when we start a vm with one cell have hugpage or nodemask settings and others not.
After this patch we will use memdev for all cells if there is at least one cell need this.
Signed-off-by: Luyao Huang <lhuang@redhat.com> --- src/qemu/qemu_command.c | 88 +++++++++++++--------- .../qemuxml2argv-numatune-memnode-no-memory.args | 3 +- 2 files changed, 55 insertions(+), 36 deletions(-) While I agree the bug you are trying to fix is real bug, the patch look a bit messy. Moreover, as far as I know Peter is doing work in this area and he's willing to fix the bug. You know, he's rewriting the area from scratch (or something like that), so I don't really want to cause a rebase conflict to him. Moreover, some tests fail after this patch, so I will not merge this for now, sorry.
Nothing, I just don't know Peter is working on it. About patch look a bit messy, i noticed this when i wrote, but i cannot found a good way to fix it without rewrite this place. And thanks a lot for your reply!
Michal
Luyao

On 16.01.2015 10:49, lhuang wrote:
On 01/16/2015 04:54 PM, Michal Privoznik wrote:
On 16.01.2015 03:12, Luyao Huang wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=1182467
We changed our build numa qemu cmd line way in f309db, But qemu cannot accept one cell use memdev and another not, so this cause a issue when we start a vm with one cell have hugpage or nodemask settings and others not.
After this patch we will use memdev for all cells if there is at least one cell need this.
Signed-off-by: Luyao Huang <lhuang@redhat.com> --- src/qemu/qemu_command.c | 88 +++++++++++++--------- .../qemuxml2argv-numatune-memnode-no-memory.args | 3 +- 2 files changed, 55 insertions(+), 36 deletions(-) While I agree the bug you are trying to fix is real bug, the patch look a bit messy. Moreover, as far as I know Peter is doing work in this area and he's willing to fix the bug. You know, he's rewriting the area from scratch (or something like that), so I don't really want to cause a rebase conflict to him. Moreover, some tests fail after this patch, so I will not merge this for now, sorry.
Nothing, I just don't know Peter is working on it. About patch look a bit messy, i noticed this when i wrote, but i cannot found a good way to fix it without rewrite this place.
Yeah, sometimes it's better to split the bug fix into two patches (or even more). In the first part you reformat the code. Then, in the second part of the patchset you just fix the bug. It's a bit harder to backport, but easier to maintain the upstream. Michal
participants (4)
-
lhuang
-
Luyao Huang
-
Michal Privoznik
-
Peter Krempa