[libvirt] [PATCH v3 00/17] Introduce NVDIMM support

NVDIMMs are new type of ultra fast storage that's plugged into DIMM slot and can hold the stored info throughout reboots. After all, NV stands for non-volatile. In virtualization world, this has an awesome advantage - less VM_EXITs on 'disk' IO. https://nvdimm.wiki.kernel.org/ How to test this feature? $ > /tmp/nvdimm $ truncate --size 512M /tmp/nvdimm $ virsh edit $mydom <memory model='nvdimm' access='shared'> <source> <path>/tmp/nvdimm</path> </source> <target> <size unit='MiB'>512</size> <node>0</node> <label> <size unit='KiB'>128</size> </label> </target> <address type='dimm' slot='0'/> </memory> $ virsh start $mydom $ ssh $mydom "echo \"Hello world\" > /dev/pmem0" $ hexdump -C /tmp/nvdimm 00000000 48 65 6c 6c 6f 20 77 6f 72 6c 64 0a 00 00 00 00 |Hello world.....| 00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 20000000 diff to v2: - John's review points worked in - Couple of more patches to clean up qemuBuildMemoryBackendStr Michal Privoznik (17): qemuBuildMemoryBackendStr: Don't overwrite @force qemuBuildMemoryBackendStr: Check for @memAccess properly qemuBuildMemoryBackendStr: Pass virDomainMemoryDefPtr qemuBuildMemoryBackendStr: Reorder args and update comment Introduce NVDIMM memory model qemu: Introduce QEMU_CAPS_DEVICE_NVDIMM qemu: Implement NVDIMM conf: Introduce @access to <memory/> qemu: Implement @access for <memory/> banks qemu: Introduce label-size for NVDIMMs security_dac: Label host side of NVDIMM security_selinux: Label host side of NVDIMM security: Introduce internal APIs for memdev labelling secdrivers: Implement memdev relabel APIs qemu_hotplug: Relabel memdev qemu: Allow nvdimm in devices CGroups qemu: Namespaces for NVDIMM docs/formatdomain.html.in | 83 ++++++++-- docs/schemas/domaincommon.rng | 47 ++++-- src/conf/domain_conf.c | 146 +++++++++++++---- src/conf/domain_conf.h | 5 + src/libvirt_private.syms | 2 + src/qemu/qemu_alias.c | 10 +- src/qemu/qemu_capabilities.c | 9 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_cgroup.c | 49 ++++++ src/qemu/qemu_cgroup.h | 4 + src/qemu/qemu_command.c | 172 +++++++++++++-------- src/qemu/qemu_command.h | 15 +- src/qemu/qemu_domain.c | 113 +++++++++++++- src/qemu/qemu_domain.h | 8 + src/qemu/qemu_hotplug.c | 40 ++++- src/qemu/qemu_security.c | 56 +++++++ src/qemu/qemu_security.h | 8 + src/security/security_dac.c | 76 +++++++++ src/security/security_driver.h | 9 ++ src/security/security_manager.c | 56 +++++++ src/security/security_manager.h | 7 + src/security/security_nop.c | 19 +++ src/security/security_selinux.c | 69 +++++++++ src/security/security_stack.c | 38 +++++ .../qemuxml2argv-memory-hotplug-nvdimm-access.args | 26 ++++ .../qemuxml2argv-memory-hotplug-nvdimm-access.xml | 56 +++++++ .../qemuxml2argv-memory-hotplug-nvdimm-label.args | 26 ++++ .../qemuxml2argv-memory-hotplug-nvdimm-label.xml | 59 +++++++ .../qemuxml2argv-memory-hotplug-nvdimm.args | 26 ++++ .../qemuxml2argv-memory-hotplug-nvdimm.xml | 56 +++++++ tests/qemuxml2argvtest.c | 8 +- ...qemuxml2xmlout-memory-hotplug-nvdimm-access.xml | 1 + .../qemuxml2xmlout-memory-hotplug-nvdimm-label.xml | 1 + .../qemuxml2xmlout-memory-hotplug-nvdimm.xml | 1 + tests/qemuxml2xmltest.c | 3 + 35 files changed, 1164 insertions(+), 141 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml -- 2.11.0

This is an input argument. We should not overwrite it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 6545a9325..f145cf9d8 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3203,7 +3203,6 @@ qemuBuildMemoryBackendStr(unsigned long long size, if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { /* we can have both pagesize and mem source, then check mem source first */ - force = true; if (virJSONValueObjectAdd(props, "s:mem-path", cfg->memoryBackingDir, NULL) < 0) @@ -3273,7 +3272,8 @@ qemuBuildMemoryBackendStr(unsigned long long size, } /* If none of the following is requested... */ - if (!needHugepage && !userNodeset && !memAccess && !nodeSpecified && !force) { + if (!needHugepage && !userNodeset && !memAccess && !nodeSpecified && + def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE && !force) { /* report back that using the new backend is not necessary * to achieve the desired configuration */ ret = 1; -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
This is an input argument. We should not overwrite it.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
ACK John

Even though this variable contains just values from an enum where zero has the usual meaning, it's enum after all and we should check it as such. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f145cf9d8..62d3a0a4e 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3272,7 +3272,8 @@ qemuBuildMemoryBackendStr(unsigned long long size, } /* If none of the following is requested... */ - if (!needHugepage && !userNodeset && !memAccess && !nodeSpecified && + if (!needHugepage && !userNodeset && !nodeSpecified && + memAccess == VIR_DOMAIN_MEMORY_ACCESS_DEFAULT && def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE && !force) { /* report back that using the new backend is not necessary * to achieve the desired configuration */ -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Even though this variable contains just values from an enum where zero has the usual meaning, it's enum after all and we should check it as such.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
ACK John

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 49 ++++++++++++++++++++++--------------------------- src/qemu/qemu_command.h | 5 +---- src/qemu/qemu_hotplug.c | 6 ++---- 3 files changed, 25 insertions(+), 35 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 62d3a0a4e..cbe3e3492 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3078,11 +3078,7 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, /** * qemuBuildMemoryBackendStr: - * @size: size of the memory device in kibibytes - * @pagesize: size of the requested memory page in KiB, 0 for default - * @guestNode: NUMA node in the guest that the memory object will be attached - * to, or -1 if NUMA is not used in the guest - * @hostNodes: map of host nodes to alloc the memory in, NULL for default + * @mem: memory definition object * @autoNodeset: fallback nodeset in case of automatic numa placement * @def: domain definition object * @qemuCaps: qemu capabilities object @@ -3100,10 +3096,7 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, * -1 in case of an error. */ int -qemuBuildMemoryBackendStr(unsigned long long size, - unsigned long long pagesize, - int guestNode, - virBitmapPtr userNodeset, +qemuBuildMemoryBackendStr(virDomainMemoryDefPtr mem, virBitmapPtr autoNodeset, virDomainDefPtr def, virQEMUCapsPtr qemuCaps, @@ -3122,26 +3115,27 @@ qemuBuildMemoryBackendStr(unsigned long long size, virBitmapPtr nodemask = NULL; int ret = -1; virJSONValuePtr props = NULL; - bool nodeSpecified = virDomainNumatuneNodeSpecified(def->numa, guestNode); + bool nodeSpecified = virDomainNumatuneNodeSpecified(def->numa, mem->targetNode); + unsigned long long pagesize = mem->pagesize; bool needHugepage = !!pagesize; *backendProps = NULL; *backendType = NULL; - if (guestNode >= 0) { + if (mem->targetNode >= 0) { /* memory devices could provide a invalid guest node */ - if (guestNode >= virDomainNumaGetNodeCount(def->numa)) { + if (mem->targetNode >= virDomainNumaGetNodeCount(def->numa)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("can't add memory backend for guest node '%d' as " "the guest has only '%zu' NUMA nodes configured"), - guestNode, virDomainNumaGetNodeCount(def->numa)); + mem->targetNode, virDomainNumaGetNodeCount(def->numa)); return -1; } - memAccess = virDomainNumaGetNodeMemoryAccessMode(def->numa, guestNode); + memAccess = virDomainNumaGetNodeMemoryAccessMode(def->numa, mem->targetNode); } - if (virDomainNumatuneGetMode(def->numa, guestNode, &mode) < 0 && + if (virDomainNumatuneGetMode(def->numa, mem->targetNode, &mode) < 0 && virDomainNumatuneGetMode(def->numa, -1, &mode) < 0) mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; @@ -3158,10 +3152,10 @@ qemuBuildMemoryBackendStr(unsigned long long size, } /* just find the master hugepage in case we don't use NUMA */ - if (guestNode < 0) + if (mem->targetNode < 0) continue; - if (virBitmapGetBit(hugepage->nodemask, guestNode, + if (virBitmapGetBit(hugepage->nodemask, mem->targetNode, &thisHugepage) < 0) { /* Ignore this error. It's not an error after all. Well, * the nodemask for this <page/> can contain lower NUMA @@ -3250,14 +3244,14 @@ qemuBuildMemoryBackendStr(unsigned long long size, *backendType = "memory-backend-ram"; } - if (virJSONValueObjectAdd(props, "U:size", size * 1024, NULL) < 0) + if (virJSONValueObjectAdd(props, "U:size", mem->size * 1024, NULL) < 0) goto cleanup; - if (userNodeset) { - nodemask = userNodeset; + if (mem->sourceNodes) { + nodemask = mem->sourceNodes; } else { if (virDomainNumatuneMaybeGetNodeset(def->numa, autoNodeset, - &nodemask, guestNode) < 0) + &nodemask, mem->targetNode) < 0) goto cleanup; } @@ -3272,7 +3266,7 @@ qemuBuildMemoryBackendStr(unsigned long long size, } /* If none of the following is requested... */ - if (!needHugepage && !userNodeset && !nodeSpecified && + if (!needHugepage && !mem->sourceNodes && !nodeSpecified && memAccess == VIR_DOMAIN_MEMORY_ACCESS_DEFAULT && def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE && !force) { /* report back that using the new backend is not necessary @@ -3321,17 +3315,19 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def, const char *backendType; int ret = -1; int rc; + virDomainMemoryDef mem = { 0 }; unsigned long long memsize = virDomainNumaGetNodeMemorySize(def->numa, cell); *backendStr = NULL; + mem.size = memsize; + mem.targetNode = cell; if (virAsprintf(&alias, "ram-node%zu", cell) < 0) goto cleanup; - if ((rc = qemuBuildMemoryBackendStr(memsize, 0, cell, NULL, auto_nodeset, - def, qemuCaps, cfg, &backendType, - &props, false)) < 0) + if ((rc = qemuBuildMemoryBackendStr(&mem, auto_nodeset, def, qemuCaps, + cfg, &backendType, &props, false)) < 0) goto cleanup; if (!(*backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType, @@ -3370,8 +3366,7 @@ qemuBuildMemoryDimmBackendStr(virDomainMemoryDefPtr mem, if (virAsprintf(&alias, "mem%s", mem->info.alias) < 0) goto cleanup; - if (qemuBuildMemoryBackendStr(mem->size, mem->pagesize, - mem->targetNode, mem->sourceNodes, auto_nodeset, + if (qemuBuildMemoryBackendStr(mem, auto_nodeset, def, qemuCaps, cfg, &backendType, &props, true) < 0) goto cleanup; diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index 69fe84613..9b2b81f55 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -124,10 +124,7 @@ char *qemuBuildControllerDevStr(const virDomainDef *domainDef, virQEMUCapsPtr qemuCaps, int *nusbcontroller); -int qemuBuildMemoryBackendStr(unsigned long long size, - unsigned long long pagesize, - int guestNode, - virBitmapPtr userNodeset, +int qemuBuildMemoryBackendStr(virDomainMemoryDefPtr mem, virBitmapPtr autoNodeset, virDomainDefPtr def, virQEMUCapsPtr qemuCaps, diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 0e4af830f..39710db19 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2240,10 +2240,8 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, if (!(devstr = qemuBuildMemoryDeviceStr(mem))) goto cleanup; - if (qemuBuildMemoryBackendStr(mem->size, mem->pagesize, - mem->targetNode, mem->sourceNodes, NULL, - vm->def, priv->qemuCaps, cfg, - &backendType, &props, true) < 0) + if (qemuBuildMemoryBackendStr(mem, NULL, vm->def, priv->qemuCaps, + cfg, &backendType, &props, true) < 0) goto cleanup; if (virDomainMemoryInsert(vm->def, mem) < 0) { -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 49 ++++++++++++++++++++++--------------------------- src/qemu/qemu_command.h | 5 +---- src/qemu/qemu_hotplug.c | 6 ++---- 3 files changed, 25 insertions(+), 35 deletions(-)
ACK John

Frankly, this function is one big mess. A lot of arguments, complicated behaviour. It's really surprising that arguments were in random order (input and output arguments were mixed together), the documentation was outdated, the description of return values was bogus. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 54 +++++++++++++++++++++++++++---------------------- src/qemu/qemu_command.h | 12 +++++------ src/qemu/qemu_hotplug.c | 4 ++-- 3 files changed, 38 insertions(+), 32 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index cbe3e3492..0cca99567 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3078,31 +3078,38 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, /** * qemuBuildMemoryBackendStr: - * @mem: memory definition object - * @autoNodeset: fallback nodeset in case of automatic numa placement - * @def: domain definition object - * @qemuCaps: qemu capabilities object + * @backendProps: [out] constructed object + * @backendType: [out] type of the backennd used * @cfg: qemu driver config object - * @aliasPrefix: prefix string of the alias (to allow for multiple frontents) - * @id: index of the device (to construct the alias) - * @backendStr: returns the object string + * @qemuCaps: qemu capabilities object + * @def: domain definition object + * @mem: memory definition object + * @autoNodeset: fallback nodeset in case of automatic NUMA placement + * @force: forcibly use one of the backends * - * Formats the configuration string for the memory device backend according - * to the configuration. @pagesize and @hostNodes can be used to override the - * default source configuration, both are optional. + * Creates a configuration object that represents memory backend of given guest + * NUMA node (domain @def and @mem). Use @autoNodeset to fine tune the + * placement of the memory on the host NUMA nodes. * - * Returns 0 on success, 1 if only the implicit memory-device-ram with no - * other configuration was used (to detect legacy configurations). Returns - * -1 in case of an error. + * By default, if no memory-backend-* object is necessary to fulfil the guest + * configuration value of 1 is returned. This behaviour can be suppressed by + * setting @force to true in which case 0 would be returned. + * + * Then, if one of the two memory-backend-* should be used, the @qemuCaps is + * consulted to check if qemu does support it. + * + * Returns: 0 on success, + * 1 on success and if there's no need to use memory-backend-* + * -1 on error. */ int -qemuBuildMemoryBackendStr(virDomainMemoryDefPtr mem, - virBitmapPtr autoNodeset, - virDomainDefPtr def, - virQEMUCapsPtr qemuCaps, - virQEMUDriverConfigPtr cfg, +qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, const char **backendType, - virJSONValuePtr *backendProps, + virQEMUDriverConfigPtr cfg, + virQEMUCapsPtr qemuCaps, + virDomainDefPtr def, + virDomainMemoryDefPtr mem, + virBitmapPtr autoNodeset, bool force) { virDomainHugePagePtr master_hugepage = NULL; @@ -3326,8 +3333,8 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def, if (virAsprintf(&alias, "ram-node%zu", cell) < 0) goto cleanup; - if ((rc = qemuBuildMemoryBackendStr(&mem, auto_nodeset, def, qemuCaps, - cfg, &backendType, &props, false)) < 0) + if ((rc = qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, + def, &mem, auto_nodeset, false)) < 0) goto cleanup; if (!(*backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType, @@ -3366,9 +3373,8 @@ qemuBuildMemoryDimmBackendStr(virDomainMemoryDefPtr mem, if (virAsprintf(&alias, "mem%s", mem->info.alias) < 0) goto cleanup; - if (qemuBuildMemoryBackendStr(mem, auto_nodeset, - def, qemuCaps, cfg, - &backendType, &props, true) < 0) + if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, + def, mem, auto_nodeset, true) < 0) goto cleanup; ret = virQEMUBuildObjectCommandlineFromJSON(backendType, alias, props); diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index 9b2b81f55..f3ed9e7e4 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -124,13 +124,13 @@ char *qemuBuildControllerDevStr(const virDomainDef *domainDef, virQEMUCapsPtr qemuCaps, int *nusbcontroller); -int qemuBuildMemoryBackendStr(virDomainMemoryDefPtr mem, - virBitmapPtr autoNodeset, - virDomainDefPtr def, - virQEMUCapsPtr qemuCaps, - virQEMUDriverConfigPtr cfg, +int qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, const char **backendType, - virJSONValuePtr *backendProps, + virQEMUDriverConfigPtr cfg, + virQEMUCapsPtr qemuCaps, + virDomainDefPtr def, + virDomainMemoryDefPtr mem, + virBitmapPtr autoNodeset, bool force); char *qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 39710db19..4e416b12e 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2240,8 +2240,8 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, if (!(devstr = qemuBuildMemoryDeviceStr(mem))) goto cleanup; - if (qemuBuildMemoryBackendStr(mem, NULL, vm->def, priv->qemuCaps, - cfg, &backendType, &props, true) < 0) + if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, + priv->qemuCaps, vm->def, mem, NULL, true) < 0) goto cleanup; if (virDomainMemoryInsert(vm->def, mem) < 0) { -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Frankly, this function is one big mess. A lot of arguments, complicated behaviour. It's really surprising that arguments were in random order (input and output arguments were mixed together), the documentation was outdated, the description of return values was bogus.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 54 +++++++++++++++++++++++++++---------------------- src/qemu/qemu_command.h | 12 +++++------ src/qemu/qemu_hotplug.c | 4 ++-- 3 files changed, 38 insertions(+), 32 deletions(-)
ACK John

NVDIMM is new type of memory introduced into QEMU 2.6. The idea is that we have a Non-Volatile memory module that keeps the data persistent across domain reboots. At the domain XML level, we already have some representation of 'dimm' modules. Long story short, we have <memory/> element that lives under <devices/>. Now, the element even has @model attribute which we can use to introduce new memory type: <memory model='nvdimm'> <source> <path>/tmp/nvdimm</path> </source> <target> <size unit='KiB'>523264</size> <node>0</node> </target> <address type='dimm' slot='0'/> </memory> So far, this is just a XML parser/formatter extension. QEMU driver implementation is in the next commit. For more info on NVDIMM visit the following web page: http://pmem.io/ Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 56 +++++++++---- docs/schemas/domaincommon.rng | 32 ++++--- src/conf/domain_conf.c | 97 ++++++++++++++++------ src/conf/domain_conf.h | 2 + src/qemu/qemu_command.c | 6 ++ src/qemu/qemu_domain.c | 5 ++ .../qemuxml2argv-memory-hotplug-nvdimm.xml | 56 +++++++++++++ .../qemuxml2xmlout-memory-hotplug-nvdimm.xml | 1 + tests/qemuxml2xmltest.c | 1 + 9 files changed, 205 insertions(+), 51 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 3095111c4..6e89bfe3a 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7091,7 +7091,6 @@ qemu-kvm -net nic,model=? /dev/null guests' memory resource needs. Some hypervisors may require NUMA configured for the guest. - <span class="since">Since 1.2.14</span> </p> <p> @@ -7116,6 +7115,15 @@ qemu-kvm -net nic,model=? /dev/null <node>1</node> </target> </memory> + <memory model='nvdimm'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>524288</size> + <node>1</node> + </target> + </memory> </devices> ... </pre> @@ -7123,28 +7131,48 @@ qemu-kvm -net nic,model=? /dev/null <dt><code>model</code></dt> <dd> <p> - Currently only the <code>dimm</code> model is supported in order to - add a virtual DIMM module to the guest. + Provide <code>dimm</code> to add a virtual DIMM module to the guest. + <span class="since">Since 1.2.14</span> + Provide <code>nvdimm</code> model adds a Non-Volatile DIMM + module. <span class="since">Since 3.2.0</span> </p> </dd> <dt><code>source</code></dt> <dd> <p> - The optional source element allows to fine tune the source of the - memory used for the given memory device. If the element is not - provided defaults configured via <code>numatune</code> are used. + For model <code>dimm</code> this element is optional and allows to + fine tune the source of the memory used for the given memory device. + If the element is not provided defaults configured via + <code>numatune</code> are used. If <code>dimm</code> is provided, + then the following optional elements can be provided as well: </p> - <p> - <code>pagesize</code> can optionally be used to override the default - host page size used for backing the memory device. - The configured value must correspond to a page size supported by the - host. - </p> + <dl> + <dt><code>pagesize</code></dt> + <dd> + <p> + This element can be used to override the default + host page size used for backing the memory device. + The configured value must correspond to a page size + supported by the host. + </p> + </dd> + + <dt><code>nodemask</code></dt> + <dd> + <p> + This element can be used to override the default + set of NUMA nodes where the memory would be + allocated. + </p> + </dd> + </dl> + <p> - <code>nodemask</code> can optionally be used to override the default - set of NUMA nodes where the memory would be allocated. + For model <code>nvdimm</code> this element is mandatory and has a + single child element <code>path</code> that represents a path + in the host that backs the nvdimm module in the guest. </p> </dd> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 1860f9075..07b3c52ad 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4746,6 +4746,7 @@ <attribute name="model"> <choice> <value>dimm</value> + <value>nvdimm</value> </choice> </attribute> <interleave> @@ -4765,18 +4766,27 @@ <define name="memorydev-source"> <element name="source"> - <interleave> - <optional> - <element name="pagesize"> - <ref name="scaledInteger"/> + <choice> + <group> + <interleave> + <optional> + <element name="pagesize"> + <ref name="scaledInteger"/> + </element> + </optional> + <optional> + <element name="nodemask"> + <ref name="cpuset"/> + </element> + </optional> + </interleave> + </group> + <group> + <element name="path"> + <ref name="absFilePath"/> </element> - </optional> - <optional> - <element name="nodemask"> - <ref name="cpuset"/> - </element> - </optional> - </interleave> + </group> + </choice> </element> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a58f99762..e23ce0015 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -858,8 +858,11 @@ VIR_ENUM_DECL(virDomainBlockJob) VIR_ENUM_IMPL(virDomainBlockJob, VIR_DOMAIN_BLOCK_JOB_TYPE_LAST, "", "", "copy", "", "active-commit") -VIR_ENUM_IMPL(virDomainMemoryModel, VIR_DOMAIN_MEMORY_MODEL_LAST, - "", "dimm") +VIR_ENUM_IMPL(virDomainMemoryModel, + VIR_DOMAIN_MEMORY_MODEL_LAST, + "", + "dimm", + "nvdimm") VIR_ENUM_IMPL(virDomainShmemModel, VIR_DOMAIN_SHMEM_MODEL_LAST, "ivshmem", @@ -2418,6 +2421,7 @@ void virDomainMemoryDefFree(virDomainMemoryDefPtr def) if (!def) return; + VIR_FREE(def->nvdimmPath); virBitmapFree(def->sourceNodes); virDomainDeviceInfoClear(&def->info); VIR_FREE(def); @@ -13769,20 +13773,36 @@ virDomainMemorySourceDefParseXML(xmlNodePtr node, xmlNodePtr save = ctxt->node; ctxt->node = node; - if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt, - &def->pagesize, false, false) < 0) - goto cleanup; - - if ((nodemask = virXPathString("string(./nodemask)", ctxt))) { - if (virBitmapParse(nodemask, &def->sourceNodes, - VIR_DOMAIN_CPUMASK_LEN) < 0) + switch ((virDomainMemoryModel) def->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt, + &def->pagesize, false, false) < 0) goto cleanup; - if (virBitmapIsAllClear(def->sourceNodes)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Invalid value of 'nodemask': %s"), nodemask); + if ((nodemask = virXPathString("string(./nodemask)", ctxt))) { + if (virBitmapParse(nodemask, &def->sourceNodes, + VIR_DOMAIN_CPUMASK_LEN) < 0) + goto cleanup; + + if (virBitmapIsAllClear(def->sourceNodes)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Invalid value of 'nodemask': %s"), nodemask); + goto cleanup; + } + } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (!(def->nvdimmPath = virXPathString("string(./path)", ctxt))) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("path is required for model nvdimm'")); goto cleanup; } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; } ret = 0; @@ -15187,12 +15207,25 @@ virDomainMemoryFindByDefInternal(virDomainDefPtr def, tmp->size != mem->size) continue; - /* source stuff -> match with device */ - if (tmp->pagesize != mem->pagesize) - continue; + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + /* source stuff -> match with device */ + if (tmp->pagesize != mem->pagesize) + continue; - if (!virBitmapEqual(tmp->sourceNodes, mem->sourceNodes)) - continue; + if (!virBitmapEqual(tmp->sourceNodes, mem->sourceNodes)) + continue; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (STRNEQ(tmp->nvdimmPath, mem->nvdimmPath)) + continue; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } break; } @@ -22585,23 +22618,35 @@ virDomainMemorySourceDefFormat(virBufferPtr buf, char *bitmap = NULL; int ret = -1; - if (!def->pagesize && !def->sourceNodes) + if (!def->pagesize && !def->sourceNodes && !def->nvdimmPath) return 0; virBufferAddLit(buf, "<source>\n"); virBufferAdjustIndent(buf, 2); - if (def->sourceNodes) { - if (!(bitmap = virBitmapFormat(def->sourceNodes))) - goto cleanup; + switch ((virDomainMemoryModel) def->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (def->sourceNodes) { + if (!(bitmap = virBitmapFormat(def->sourceNodes))) + goto cleanup; - virBufferAsprintf(buf, "<nodemask>%s</nodemask>\n", bitmap); + virBufferAsprintf(buf, "<nodemask>%s</nodemask>\n", bitmap); + } + + if (def->pagesize) + virBufferAsprintf(buf, "<pagesize unit='KiB'>%llu</pagesize>\n", + def->pagesize); + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virBufferEscapeString(buf, "<path>%s</path>\n", def->nvdimmPath); + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; } - if (def->pagesize) - virBufferAsprintf(buf, "<pagesize unit='KiB'>%llu</pagesize>\n", - def->pagesize); - virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</source>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 1e53cc328..823582253 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1996,6 +1996,7 @@ struct _virDomainRNGDef { typedef enum { VIR_DOMAIN_MEMORY_MODEL_NONE, VIR_DOMAIN_MEMORY_MODEL_DIMM, /* dimm hotpluggable memory device */ + VIR_DOMAIN_MEMORY_MODEL_NVDIMM, /* nvdimm memory device */ VIR_DOMAIN_MEMORY_MODEL_LAST } virDomainMemoryModel; @@ -2004,6 +2005,7 @@ struct _virDomainMemoryDef { /* source */ virBitmapPtr sourceNodes; unsigned long long pagesize; /* kibibytes */ + char *nvdimmPath; /* target */ int model; /* virDomainMemoryModel */ diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 0cca99567..07178f839 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3416,6 +3416,12 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) break; + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virReportError(VIR_ERR_NO_SUPPORT, "%s", + _("nvdimm not supported yet")); + return NULL; + break; + case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: break; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 1a42fcf1b..66c0e5911 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -5951,6 +5951,11 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, } break; + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm hotplug not supported yet")); + return -1; + case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: return -1; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml new file mode 100644 index 000000000..1578db453 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml @@ -0,0 +1,56 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory> + <memory unit='KiB'>1267710</memory> + <currentMemory unit='KiB'>1267710</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <idmap> + <uid start='0' target='1000' count='10'/> + <gid start='0' target='1000' count='10'/> + </idmap> + <cpu> + <topology sockets='2' cores='1' threads='1'/> + <numa> + <cell id='0' cpus='0-1' memory='1048576' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + <memory model='nvdimm'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>523264</size> + <node>0</node> + </target> + <address type='dimm' slot='0'/> + </memory> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml new file mode 120000 index 000000000..4cac477a9 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml @@ -0,0 +1 @@ +../qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 4353ad245..e1c341dd5 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1078,6 +1078,7 @@ mymain(void) DO_TEST("memory-hotplug", NONE); DO_TEST("memory-hotplug-nonuma", NONE); DO_TEST("memory-hotplug-dimm", NONE); + DO_TEST("memory-hotplug-nvdimm", NONE); DO_TEST("net-udp", NONE); DO_TEST("video-virtio-gpu-device", NONE); -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
NVDIMM is new type of memory introduced into QEMU 2.6. The idea is that we have a Non-Volatile memory module that keeps the data persistent across domain reboots.
At the domain XML level, we already have some representation of 'dimm' modules. Long story short, we have <memory/> element that
(From my v2 review): Starting with "Long story short..." how about instead: NVDIMM will utilize the existing <memory/> element that lives under <devices/> by adding a new attribute 'nvdimm' to the existing @model and introduce a new <path/> element for <source/> while reusing other fields. The resulting XML would appear as:
lives under <devices/>. Now, the element even has @model attribute which we can use to introduce new memory type:
<memory model='nvdimm'> <source> <path>/tmp/nvdimm</path> </source> <target> <size unit='KiB'>523264</size> <node>0</node> </target> <address type='dimm' slot='0'/> </memory>
So far, this is just a XML parser/formatter extension. QEMU driver implementation is in the next commit.
Would it be important to indicate that the size is included with the overall domain memory size?
For more info on NVDIMM visit the following web page:
I know this is one of those almost hate to mention it, but should the link be listed in formatdomain? It would be "lost" in a commit message, but then again if the website domain changes, then we have a dead link.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 56 +++++++++---- docs/schemas/domaincommon.rng | 32 ++++--- src/conf/domain_conf.c | 97 ++++++++++++++++------ src/conf/domain_conf.h | 2 + src/qemu/qemu_command.c | 6 ++ src/qemu/qemu_domain.c | 5 ++ .../qemuxml2argv-memory-hotplug-nvdimm.xml | 56 +++++++++++++ .../qemuxml2xmlout-memory-hotplug-nvdimm.xml | 1 + tests/qemuxml2xmltest.c | 1 + 9 files changed, 205 insertions(+), 51 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml
ACK whether or not you alter the commit message and/or formatdomain John

Introduce a qemu capability for -device nvdimm. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_capabilities.c | 9 +++++++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 10 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 5a3b4ac50..2f459d128 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -359,6 +359,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "query-cpu-model-expansion", /* 245 */ "virtio-net.host_mtu", "spice-rendernode", + "nvdimm", ); @@ -1626,6 +1627,7 @@ struct virQEMUCapsStringFlags virQEMUCapsObjectTypes[] = { { "ivshmem-plain", QEMU_CAPS_DEVICE_IVSHMEM_PLAIN }, { "ivshmem-doorbell", QEMU_CAPS_DEVICE_IVSHMEM_DOORBELL }, { "vhost-scsi", QEMU_CAPS_DEVICE_VHOST_SCSI }, + { "nvdimm", QEMU_CAPS_DEVICE_NVDIMM }, }; static struct virQEMUCapsStringFlags virQEMUCapsObjectPropsVirtioBalloon[] = { @@ -4545,6 +4547,13 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsProbeQMPGICCapabilities(qemuCaps, mon) < 0) goto cleanup; + /* Prealloc on NVDIMMs is broken on older QEMUs leading to + * user data corruption. If we are dealing with such version + * of QEMU pretend we don't know how to NVDIMM. */ + if (qemuCaps->version < 2009000 && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) + virQEMUCapsClear(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM); + ret = 0; cleanup: return ret; diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index cc9f46e65..1b4bcfb2f 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -394,6 +394,7 @@ typedef enum { QEMU_CAPS_QUERY_CPU_MODEL_EXPANSION, /* qmp query-cpu-model-expansion */ QEMU_CAPS_VIRTIO_NET_HOST_MTU, /* virtio-net-*.host_mtu */ QEMU_CAPS_SPICE_RENDERNODE, /* -spice rendernode */ + QEMU_CAPS_DEVICE_NVDIMM, /* -device nvdimm */ QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Introduce a qemu capability for -device nvdimm.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_capabilities.c | 9 +++++++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 10 insertions(+)
ACK, John

So, majority of the code is just ready as-is. Well, with one slight change: differentiate between dimm and nvdimm in places like device alias generation, generating the command line and so on. Speaking of the command line, we also need to append 'nvdimm=on' to the '-machine' argument so that the nvdimm feature is advertised in the ACPI tables properly. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_alias.c | 10 ++- src/qemu/qemu_command.c | 76 +++++++++++++++------- src/qemu/qemu_domain.c | 42 ++++++++---- .../qemuxml2argv-memory-hotplug-nvdimm.args | 26 ++++++++ tests/qemuxml2argvtest.c | 4 +- 5 files changed, 121 insertions(+), 37 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args diff --git a/src/qemu/qemu_alias.c b/src/qemu/qemu_alias.c index 4cccf231e..05e1293ef 100644 --- a/src/qemu/qemu_alias.c +++ b/src/qemu/qemu_alias.c @@ -352,17 +352,23 @@ qemuAssignDeviceMemoryAlias(virDomainDefPtr def, size_t i; int maxidx = 0; int idx; + const char *prefix; + + if (mem->model == VIR_DOMAIN_MEMORY_MODEL_DIMM) + prefix = "dimm"; + else + prefix = "nvdimm"; if (oldAlias) { for (i = 0; i < def->nmems; i++) { - if ((idx = qemuDomainDeviceAliasIndex(&def->mems[i]->info, "dimm")) >= maxidx) + if ((idx = qemuDomainDeviceAliasIndex(&def->mems[i]->info, prefix)) >= maxidx) maxidx = idx + 1; } } else { maxidx = mem->info.addr.dimm.slot; } - if (virAsprintf(&mem->info.alias, "dimm%d", maxidx) < 0) + if (virAsprintf(&mem->info.alias, "%s%d", prefix, maxidx) < 0) return -1; return 0; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 07178f839..a66ce6645 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3118,7 +3118,8 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, const long system_page_size = virGetSystemPageSizeKB(); virDomainMemoryAccess memAccess = VIR_DOMAIN_MEMORY_ACCESS_DEFAULT; size_t i; - char *mem_path = NULL; + char *memPath = NULL; + bool prealloc = false; virBitmapPtr nodemask = NULL; int ret = -1; virJSONValuePtr props = NULL; @@ -3199,26 +3200,31 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, if (!(props = virJSONValueNewObject())) return -1; - if (pagesize || def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + if (pagesize || mem->nvdimmPath || + def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { *backendType = "memory-backend-file"; - if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { - /* we can have both pagesize and mem source, then check mem source first */ - if (virJSONValueObjectAdd(props, - "s:mem-path", cfg->memoryBackingDir, - NULL) < 0) + if (mem->nvdimmPath) { + if (VIR_STRDUP(memPath, mem->nvdimmPath) < 0) + goto cleanup; + prealloc = true; + } else if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + /* We can have both pagesize and mem source, + * then check mem source first. */ + if (VIR_STRDUP(memPath, cfg->memoryBackingDir) < 0) goto cleanup; } else { - if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &mem_path) < 0) - goto cleanup; - - if (virJSONValueObjectAdd(props, - "b:prealloc", true, - "s:mem-path", mem_path, - NULL) < 0) + if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPath) < 0) goto cleanup; + prealloc = true; } + if (virJSONValueObjectAdd(props, + "B:prealloc", prealloc, + "s:mem-path", memPath, + NULL) < 0) + goto cleanup; + switch (memAccess) { case VIR_DOMAIN_MEMORY_ACCESS_SHARED: if (virJSONValueObjectAdd(props, "b:share", true, NULL) < 0) @@ -3274,6 +3280,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, /* If none of the following is requested... */ if (!needHugepage && !mem->sourceNodes && !nodeSpecified && + !mem->nvdimmPath && memAccess == VIR_DOMAIN_MEMORY_ACCESS_DEFAULT && def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE && !force) { /* report back that using the new backend is not necessary @@ -3303,8 +3310,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, cleanup: virJSONValueFree(props); - VIR_FREE(mem_path); - + VIR_FREE(memPath); return ret; } @@ -3391,6 +3397,7 @@ char * qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) { virBuffer buf = VIR_BUFFER_INITIALIZER; + const char *device; if (!mem->info.alias) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -3399,8 +3406,15 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) } switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: case VIR_DOMAIN_MEMORY_MODEL_DIMM: - virBufferAddLit(&buf, "pc-dimm,"); + + if (mem->model == VIR_DOMAIN_MEMORY_MODEL_DIMM) + device = "pc-dimm"; + else + device = "nvdimm"; + + virBufferAsprintf(&buf, "%s,", device); if (mem->targetNode >= 0) virBufferAsprintf(&buf, "node=%d,", mem->targetNode); @@ -3416,12 +3430,6 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) break; - case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: - virReportError(VIR_ERR_NO_SUPPORT, "%s", - _("nvdimm not supported yet")); - return NULL; - break; - case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: break; @@ -6972,6 +6980,7 @@ qemuBuildMachineCommandLine(virCommandPtr cmd, { virBuffer buf = VIR_BUFFER_INITIALIZER; bool obsoleteAccel = false; + size_t i; int ret = -1; /* This should *never* be NULL, since we always provide @@ -7008,6 +7017,15 @@ qemuBuildMachineCommandLine(virCommandPtr cmd, "with this QEMU binary")); return -1; } + + for (i = 0; i < def->nmems; i++) { + if (def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm not is not available " + "with this QEMU binary")); + return -1; + } + } } else { virTristateSwitch vmport = def->features[VIR_DOMAIN_FEATURE_VMPORT]; virTristateSwitch smm = def->features[VIR_DOMAIN_FEATURE_SMM]; @@ -7128,6 +7146,18 @@ qemuBuildMachineCommandLine(virCommandPtr cmd, } } + for (i = 0; i < def->nmems; i++) { + if (def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm isn't supported by this QEMU binary")); + goto cleanup; + } + virBufferAddLit(&buf, ",nvdimm=on"); + break; + } + } + virCommandAddArgBuffer(cmd, &buf); } diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 66c0e5911..495242981 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -5919,6 +5919,7 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, { switch ((virDomainMemoryModel) mem->model) { case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: if (mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DIMM && mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -5951,11 +5952,6 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, } break; - case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("nvdimm hotplug not supported yet")); - return -1; - case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: return -1; @@ -5986,6 +5982,8 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, unsigned int nmems = def->nmems; unsigned long long hotplugSpace; unsigned long long hotplugMemory = 0; + bool needPCDimmCap = false; + bool needNvdimmCap = false; size_t i; hotplugSpace = def->mem.max_memory - virDomainDefGetMemoryInitial(def); @@ -6009,12 +6007,6 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, return 0; } - if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("memory hotplug isn't supported by this QEMU binary")); - return -1; - } - if (!ARCH_IS_PPC64(def->os.arch)) { /* due to guest support, qemu would silently enable NUMA with one node * once the memory hotplug backend is enabled. To avoid possible @@ -6038,6 +6030,34 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, for (i = 0; i < def->nmems; i++) { hotplugMemory += def->mems[i]->size; + switch ((virDomainMemoryModel) def->mems[i]->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + needPCDimmCap = true; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + needNvdimmCap = true; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } + + if (needPCDimmCap && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("memory hotplug isn't supported by this QEMU binary")); + return -1; + } + + if (needNvdimmCap && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm isn't supported by this QEMU binary")); + return -1; + } + /* already existing devices don't need to be checked on hotplug */ if (!mem && qemuDomainDefValidateMemoryHotplugDevice(def->mems[i], def) < 0) diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args new file mode 100644 index 000000000..907bcbeda --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=tcg,nvdimm=on \ +-m size=1048576k,slots=16,maxmem=1099511627776k \ +-smp 2,sockets=2,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0-1,mem=1024 \ +-object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\ +size=536870912 \ +-device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \ +-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index d2d267fce..b6fdaaeb3 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2329,7 +2329,7 @@ mymain(void) DO_TEST_FAILURE("memory-align-fail", NONE); DO_TEST_FAILURE("memory-hotplug-nonuma", QEMU_CAPS_DEVICE_PC_DIMM); - DO_TEST_FAILURE("memory-hotplug", NONE); + DO_TEST("memory-hotplug", NONE); DO_TEST("memory-hotplug", QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_NUMA); DO_TEST("memory-hotplug-dimm", QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); @@ -2337,6 +2337,8 @@ mymain(void) QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("memory-hotplug-ppc64-nonuma", QEMU_CAPS_KVM, QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST("memory-hotplug-nvdimm", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, + QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("machine-aeskeywrap-on-caps", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_AES_KEY_WRAP, -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
So, majority of the code is just ready as-is. Well, with one slight change: differentiate between dimm and nvdimm in places like device alias generation, generating the command line and so on.
Speaking of the command line, we also need to append 'nvdimm=on' to the '-machine' argument so that the nvdimm feature is advertised in the ACPI tables properly.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_alias.c | 10 ++- src/qemu/qemu_command.c | 76 +++++++++++++++------- src/qemu/qemu_domain.c | 42 ++++++++---- .../qemuxml2argv-memory-hotplug-nvdimm.args | 26 ++++++++ tests/qemuxml2argvtest.c | 4 +- 5 files changed, 121 insertions(+), 37 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args
[...]
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 07178f839..a66ce6645 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3118,7 +3118,8 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
const long system_page_size = virGetSystemPageSizeKB(); virDomainMemoryAccess memAccess = VIR_DOMAIN_MEMORY_ACCESS_DEFAULT; size_t i; - char *mem_path = NULL; + char *memPath = NULL; + bool prealloc = false; virBitmapPtr nodemask = NULL; int ret = -1; virJSONValuePtr props = NULL; @@ -3199,26 +3200,31 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, if (!(props = virJSONValueNewObject())) return -1;
- if (pagesize || def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + if (pagesize || mem->nvdimmPath || + def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { *backendType = "memory-backend-file";
- if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { - /* we can have both pagesize and mem source, then check mem source first */ - if (virJSONValueObjectAdd(props, - "s:mem-path", cfg->memoryBackingDir, - NULL) < 0) + if (mem->nvdimmPath) { + if (VIR_STRDUP(memPath, mem->nvdimmPath) < 0) + goto cleanup; + prealloc = true; + } else if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + /* We can have both pagesize and mem source, + * then check mem source first. */ + if (VIR_STRDUP(memPath, cfg->memoryBackingDir) < 0) goto cleanup; } else { - if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &mem_path) < 0) - goto cleanup; - - if (virJSONValueObjectAdd(props, - "b:prealloc", true, - "s:mem-path", mem_path, - NULL) < 0) + if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPath) < 0) goto cleanup; + prealloc = true; }
FWIW: In my v2 review I alluded to a double comma thing (e.g. code that needs virQEMUBuildBufferEscapeComma)... This is what I was thinking about, but IIRC it's only something for certain command line options and not JSON object building...
+ if (virJSONValueObjectAdd(props, + "B:prealloc", prealloc, + "s:mem-path", memPath, + NULL) < 0) + goto cleanup; + switch (memAccess) { case VIR_DOMAIN_MEMORY_ACCESS_SHARED: if (virJSONValueObjectAdd(props, "b:share", true, NULL) < 0) @@ -3274,6 +3280,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
/* If none of the following is requested... */ if (!needHugepage && !mem->sourceNodes && !nodeSpecified && + !mem->nvdimmPath && memAccess == VIR_DOMAIN_MEMORY_ACCESS_DEFAULT && def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE && !force) { /* report back that using the new backend is not necessary @@ -3303,8 +3310,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
cleanup: virJSONValueFree(props); - VIR_FREE(mem_path); - + VIR_FREE(memPath); return ret; }
[...]
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 66c0e5911..495242981 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -5919,6 +5919,7 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, { switch ((virDomainMemoryModel) mem->model) { case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: if (mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DIMM && mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -5951,11 +5952,6 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, } break;
- case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("nvdimm hotplug not supported yet")); - return -1; - case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: return -1; @@ -5986,6 +5982,8 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, unsigned int nmems = def->nmems; unsigned long long hotplugSpace; unsigned long long hotplugMemory = 0; + bool needPCDimmCap = false; + bool needNvdimmCap = false;
needNVDimm could be used.... although I see no reason for these bool's the way the rest is written.
size_t i;
hotplugSpace = def->mem.max_memory - virDomainDefGetMemoryInitial(def); @@ -6009,12 +6007,6 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, return 0; }
- if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("memory hotplug isn't supported by this QEMU binary")); - return -1; - } - if (!ARCH_IS_PPC64(def->os.arch)) { /* due to guest support, qemu would silently enable NUMA with one node * once the memory hotplug backend is enabled. To avoid possible @@ -6038,6 +6030,34 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, for (i = 0; i < def->nmems; i++) { hotplugMemory += def->mems[i]->size;
+ switch ((virDomainMemoryModel) def->mems[i]->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + needPCDimmCap = true; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + needNvdimmCap = true; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } + + if (needPCDimmCap && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("memory hotplug isn't supported by this QEMU binary")); + return -1; + } + + if (needNvdimmCap && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm isn't supported by this QEMU binary")); + return -1; + } +
Still inefficient as virQEMUCapsGet will get called each time through the for loop as soon as need*Cap = true... Perhaps even moreso since one or the other will be called both times for a single pass if there both types of *Dimm defs are in the XML (once one or the other is seen). I think the bool's should be turned into 'checkedPCDimmCap' and 'checkedNVDimmCap' though and that would be less inefficient. e.g. within the case: case VIR_DOMAIN_MEMORY_MODEL_DIMM: if (!checkedPCDimmCap) { if (!virQEMUCapsGet()) failure checkedPCDimmCap = true; }
/* already existing devices don't need to be checked on hotplug */ if (!mem && qemuDomainDefValidateMemoryHotplugDevice(def->mems[i], def) < 0)
ACK - I think it would be good to not make a CapsGet on every pass through though John

On 03/14/2017 02:36 PM, John Ferlan wrote:
On 03/09/2017 11:06 AM, Michal Privoznik wrote:
So, majority of the code is just ready as-is. Well, with one slight change: differentiate between dimm and nvdimm in places like device alias generation, generating the command line and so on.
Speaking of the command line, we also need to append 'nvdimm=on' to the '-machine' argument so that the nvdimm feature is advertised in the ACPI tables properly.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_alias.c | 10 ++- src/qemu/qemu_command.c | 76 +++++++++++++++------- src/qemu/qemu_domain.c | 42 ++++++++---- .../qemuxml2argv-memory-hotplug-nvdimm.args | 26 ++++++++ tests/qemuxml2argvtest.c | 4 +- 5 files changed, 121 insertions(+), 37 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args
[...]
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 07178f839..a66ce6645 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3118,7 +3118,8 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
const long system_page_size = virGetSystemPageSizeKB(); virDomainMemoryAccess memAccess = VIR_DOMAIN_MEMORY_ACCESS_DEFAULT; size_t i; - char *mem_path = NULL; + char *memPath = NULL; + bool prealloc = false; virBitmapPtr nodemask = NULL; int ret = -1; virJSONValuePtr props = NULL; @@ -3199,26 +3200,31 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, if (!(props = virJSONValueNewObject())) return -1;
- if (pagesize || def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + if (pagesize || mem->nvdimmPath || + def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { *backendType = "memory-backend-file";
- if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { - /* we can have both pagesize and mem source, then check mem source first */ - if (virJSONValueObjectAdd(props, - "s:mem-path", cfg->memoryBackingDir, - NULL) < 0) + if (mem->nvdimmPath) { + if (VIR_STRDUP(memPath, mem->nvdimmPath) < 0) + goto cleanup; + prealloc = true; + } else if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + /* We can have both pagesize and mem source, + * then check mem source first. */ + if (VIR_STRDUP(memPath, cfg->memoryBackingDir) < 0) goto cleanup; } else { - if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &mem_path) < 0) - goto cleanup; - - if (virJSONValueObjectAdd(props, - "b:prealloc", true, - "s:mem-path", mem_path, - NULL) < 0) + if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPath) < 0) goto cleanup; + prealloc = true; }
FWIW: In my v2 review I alluded to a double comma thing (e.g. code that needs virQEMUBuildBufferEscapeComma)... This is what I was thinking about, but IIRC it's only something for certain command line options and not JSON object building...
It is not needed for JSON build.
+ if (virJSONValueObjectAdd(props, + "B:prealloc", prealloc, + "s:mem-path", memPath, + NULL) < 0) + goto cleanup; + switch (memAccess) { case VIR_DOMAIN_MEMORY_ACCESS_SHARED: if (virJSONValueObjectAdd(props, "b:share", true, NULL) < 0) @@ -3274,6 +3280,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
/* If none of the following is requested... */ if (!needHugepage && !mem->sourceNodes && !nodeSpecified && + !mem->nvdimmPath && memAccess == VIR_DOMAIN_MEMORY_ACCESS_DEFAULT && def->mem.source != VIR_DOMAIN_MEMORY_SOURCE_FILE && !force) { /* report back that using the new backend is not necessary @@ -3303,8 +3310,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
cleanup: virJSONValueFree(props); - VIR_FREE(mem_path); - + VIR_FREE(memPath); return ret; }
[...]
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 66c0e5911..495242981 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -5919,6 +5919,7 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, { switch ((virDomainMemoryModel) mem->model) { case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: if (mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DIMM && mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -5951,11 +5952,6 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, } break;
- case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("nvdimm hotplug not supported yet")); - return -1; - case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: return -1; @@ -5986,6 +5982,8 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, unsigned int nmems = def->nmems; unsigned long long hotplugSpace; unsigned long long hotplugMemory = 0; + bool needPCDimmCap = false; + bool needNvdimmCap = false;
needNVDimm could be used.... although I see no reason for these bool's the way the rest is written.
size_t i;
hotplugSpace = def->mem.max_memory - virDomainDefGetMemoryInitial(def); @@ -6009,12 +6007,6 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, return 0; }
- if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("memory hotplug isn't supported by this QEMU binary")); - return -1; - } - if (!ARCH_IS_PPC64(def->os.arch)) { /* due to guest support, qemu would silently enable NUMA with one node * once the memory hotplug backend is enabled. To avoid possible @@ -6038,6 +6030,34 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, for (i = 0; i < def->nmems; i++) { hotplugMemory += def->mems[i]->size;
+ switch ((virDomainMemoryModel) def->mems[i]->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + needPCDimmCap = true; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + needNvdimmCap = true; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } + + if (needPCDimmCap && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("memory hotplug isn't supported by this QEMU binary")); + return -1; + } + + if (needNvdimmCap && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm isn't supported by this QEMU binary")); + return -1; + } +
Still inefficient as virQEMUCapsGet will get called each time through the for loop as soon as need*Cap = true... Perhaps even moreso since one or the other will be called both times for a single pass if there both types of *Dimm defs are in the XML (once one or the other is seen).
Oh, I am a giddy goat. This should have been: for () { switch() { case MODEL_DIM: needPCDimmCap = true; break; case MODEL_NVDIMM: needNvdimmCap = true; break; } } if (needPCDimmCap && !virQEMUCapsGet()) error; if (needNvdimmCap && !virQEMUCapsGet()) error; I am fixing it as such. Thanks. Michal

Now that NVDIMM has found its way into libvirt, users might want to fine tune some settings for each module separately. One such setting is 'share=on|off' for the memory-backend-file object. This setting - just like its name suggest already - enables sharing the nvdimm module with other applications. Under the hood it controls whether qemu mmaps() the file as MAP_PRIVATE or MAP_SHARED. Yet again, we have such config knob in domain XML, but it's just an attribute to numa <cell/>. This does not give fine enough tuning on per-memdevice basis so we need to have the attribute for each device too. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 16 ++++++- docs/schemas/domaincommon.rng | 8 ++++ src/conf/domain_conf.c | 15 +++++- src/conf/domain_conf.h | 2 + .../qemuxml2argv-memory-hotplug-nvdimm-access.xml | 56 ++++++++++++++++++++++ ...qemuxml2xmlout-memory-hotplug-nvdimm-access.xml | 1 + tests/qemuxml2xmltest.c | 1 + 7 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 6e89bfe3a..4bc3d92f9 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1417,7 +1417,7 @@ <span class='since'>Since 1.2.9</span> the optional attribute <code>memAccess</code> can control whether the memory is to be mapped as "shared" or "private". This is valid only for - hugepages-backed memory. + hugepages-backed memory and nvdimm modules. </p> <p> @@ -7099,7 +7099,7 @@ qemu-kvm -net nic,model=? /dev/null <pre> ... <devices> - <memory model='dimm'> + <memory model='dimm' access='private'> <target> <size unit='KiB'>524287</size> <node>0</node> @@ -7138,6 +7138,18 @@ qemu-kvm -net nic,model=? /dev/null </p> </dd> + <dt><code>access</code></dt> + <dd> + <p> + An optional attribute <code>access</code> + (<span class="since">since 3.2.0</span>) that provides + capability to fine tune mapping of the memory on per + module basis. Values are the same as + <a href="#elementsMemoryBacking">Memory Backing</a>: + <code>shared</code> and <code>private</code>. + </p> + </dd> + <dt><code>source</code></dt> <dd> <p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 07b3c52ad..5e7e75950 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4749,6 +4749,14 @@ <value>nvdimm</value> </choice> </attribute> + <optional> + <attribute name="access"> + <choice> + <value>shared</value> + <value>private</value> + </choice> + </attribute> + </optional> <interleave> <optional> <ref name="memorydev-source"/> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index e23ce0015..bf3f2fb86 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -13874,6 +13874,15 @@ virDomainMemoryDefParseXML(xmlNodePtr memdevNode, } VIR_FREE(tmp); + tmp = virXMLPropString(memdevNode, "access"); + if (tmp && + (def->access = virDomainMemoryAccessTypeFromString(tmp)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid access mode '%s'"), tmp); + goto error; + } + VIR_FREE(tmp); + /* source */ if ((node = virXPathNode("./source", ctxt)) && virDomainMemorySourceDefParseXML(node, ctxt, def) < 0) @@ -22680,7 +22689,11 @@ virDomainMemoryDefFormat(virBufferPtr buf, { const char *model = virDomainMemoryModelTypeToString(def->model); - virBufferAsprintf(buf, "<memory model='%s'>\n", model); + virBufferAsprintf(buf, "<memory model='%s'", model); + if (def->access) + virBufferAsprintf(buf, " access='%s'", + virDomainMemoryAccessTypeToString(def->access)); + virBufferAddLit(buf, ">\n"); virBufferAdjustIndent(buf, 2); if (virDomainMemorySourceDefFormat(buf, def) < 0) diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 823582253..fa27708ce 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2002,6 +2002,8 @@ typedef enum { } virDomainMemoryModel; struct _virDomainMemoryDef { + virDomainMemoryAccess access; + /* source */ virBitmapPtr sourceNodes; unsigned long long pagesize; /* kibibytes */ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml new file mode 100644 index 000000000..700e961a6 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml @@ -0,0 +1,56 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory> + <memory unit='KiB'>1267710</memory> + <currentMemory unit='KiB'>1267710</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <idmap> + <uid start='0' target='1000' count='10'/> + <gid start='0' target='1000' count='10'/> + </idmap> + <cpu> + <topology sockets='2' cores='1' threads='1'/> + <numa> + <cell id='0' cpus='0-1' memory='219136' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + <memory model='nvdimm' access='private'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>523264</size> + <node>0</node> + </target> + <address type='dimm' slot='0'/> + </memory> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml new file mode 120000 index 000000000..4b0496ad5 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml @@ -0,0 +1 @@ +/home/zippy/work/libvirt/libvirt.git/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index e1c341dd5..ef49a79ca 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1079,6 +1079,7 @@ mymain(void) DO_TEST("memory-hotplug-nonuma", NONE); DO_TEST("memory-hotplug-dimm", NONE); DO_TEST("memory-hotplug-nvdimm", NONE); + DO_TEST("memory-hotplug-nvdimm-access", NONE); DO_TEST("net-udp", NONE); DO_TEST("video-virtio-gpu-device", NONE); -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Now that NVDIMM has found its way into libvirt, users might want to fine tune some settings for each module separately. One such setting is 'share=on|off' for the memory-backend-file object. This setting - just like its name suggest already - enables sharing the nvdimm module with other applications. Under the hood it controls whether qemu mmaps() the file as MAP_PRIVATE or MAP_SHARED.
Yet again, we have such config knob in domain XML, but it's just an attribute to numa <cell/>. This does not give fine enough tuning on per-memdevice basis so we need to have the attribute for each device too.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 16 ++++++- docs/schemas/domaincommon.rng | 8 ++++ src/conf/domain_conf.c | 15 +++++- src/conf/domain_conf.h | 2 + .../qemuxml2argv-memory-hotplug-nvdimm-access.xml | 56 ++++++++++++++++++++++ ...qemuxml2xmlout-memory-hotplug-nvdimm-access.xml | 1 + tests/qemuxml2xmltest.c | 1 + 7 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml
This patch fails make check for me because your qemuxml2xmloutdata has the wrong softlink set up. I don't have "/home/zippy/work/...", but I would have e.g. from the tests/qemuxml2xmloutdata directory use: ln -sf ../qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml qemuxml2xmlout-memory-hotplug-nvdimm-access.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 6e89bfe3a..4bc3d92f9 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1417,7 +1417,7 @@ <span class='since'>Since 1.2.9</span> the optional attribute <code>memAccess</code> can control whether the memory is to be mapped as "shared" or "private". This is valid only for - hugepages-backed memory. + hugepages-backed memory and nvdimm modules. </p>
<p> @@ -7099,7 +7099,7 @@ qemu-kvm -net nic,model=? /dev/null <pre> ... <devices> - <memory model='dimm'> + <memory model='dimm' access='private'> <target> <size unit='KiB'>524287</size> <node>0</node> @@ -7138,6 +7138,18 @@ qemu-kvm -net nic,model=? /dev/null </p> </dd>
+ <dt><code>access</code></dt> + <dd> + <p> + An optional attribute <code>access</code> + (<span class="since">since 3.2.0</span>) that provides + capability to fine tune mapping of the memory on per + module basis. Values are the same as + <a href="#elementsMemoryBacking">Memory Backing</a>: + <code>shared</code> and <code>private</code>. + </p> + </dd> +
From the v2 code review comment/response - can/should we state right here that the access setting in/for nvdimm would override any memoryBacking setting?'
If so, then would it be furthermore interesting to modify the XML here to set the memoryBacking as private and override that, but it's not required... ACK w/ the fixed link to the output file... Your call on the doc change John

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 5 +++-- .../qemuxml2argv-memory-hotplug-nvdimm-access.args | 26 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 ++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index a66ce6645..86a52ef62 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3116,7 +3116,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, virDomainHugePagePtr hugepage = NULL; virDomainNumatuneMemMode mode; const long system_page_size = virGetSystemPageSizeKB(); - virDomainMemoryAccess memAccess = VIR_DOMAIN_MEMORY_ACCESS_DEFAULT; + virDomainMemoryAccess memAccess = mem->access; size_t i; char *memPath = NULL; bool prealloc = false; @@ -3130,7 +3130,8 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, *backendProps = NULL; *backendType = NULL; - if (mem->targetNode >= 0) { + if (memAccess == VIR_DOMAIN_MEMORY_ACCESS_DEFAULT && + mem->targetNode >= 0) { /* memory devices could provide a invalid guest node */ if (mem->targetNode >= virDomainNumaGetNodeCount(def->numa)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args new file mode 100644 index 000000000..d398b2294 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=tcg,nvdimm=on \ +-m size=219136k,slots=16,maxmem=1099511627776k \ +-smp 2,sockets=2,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0-1,mem=214 \ +-object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\ +share=no,size=536870912 \ +-device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \ +-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index b6fdaaeb3..bf839fe93 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2339,6 +2339,8 @@ mymain(void) QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("memory-hotplug-nvdimm", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST("memory-hotplug-nvdimm-access", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, + QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("machine-aeskeywrap-on-caps", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_AES_KEY_WRAP, -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 5 +++-- .../qemuxml2argv-memory-hotplug-nvdimm-access.args | 26 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 ++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args
Hmm ... I guess the note I made in previous review would need the amendment regarding whether or not the specific dimm defined an access mode. If it didn't, but was using a numa node, then it would take on the numa node's access; otherwise, if an access is defined, then regardless of what's set for memoryBacking - the access for the nvdimm would be used. ACK for what's here though... I'm just glad I don't have to configure these - it's such a tangled web John

For NVDIMM devices it is optionally possible to specify the size of internal storage for namespaces. Namespaces are a feature that allows users to partition the NVDIMM for different uses. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 11 ++++ docs/schemas/domaincommon.rng | 7 +++ src/conf/domain_conf.c | 34 +++++++++++++ src/conf/domain_conf.h | 1 + src/qemu/qemu_command.c | 3 ++ .../qemuxml2argv-memory-hotplug-nvdimm-label.args | 26 ++++++++++ .../qemuxml2argv-memory-hotplug-nvdimm-label.xml | 59 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 + .../qemuxml2xmlout-memory-hotplug-nvdimm-label.xml | 1 + tests/qemuxml2xmltest.c | 1 + 10 files changed, 145 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 4bc3d92f9..bb6df4542 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7122,6 +7122,9 @@ qemu-kvm -net nic,model=? /dev/null <target> <size unit='KiB'>524288</size> <node>1</node> + <label> + <size unit='KiB'>128</size> + </label> </target> </memory> </devices> @@ -7203,6 +7206,14 @@ qemu-kvm -net nic,model=? /dev/null attach the memory to. The element shall be used only if the guest has NUMA nodes configured. </p> + <p> + For NVDIMM type devices one can optionally use + <code>label</code> and its subelement <code>size</code> + to configure the size of namespaces label storage + within the NVDIMM module. The <code>size</code> element + has usual meaning described + <a href="#elementsMemoryAllocation">here</a>. + </p> </dd> </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 5e7e75950..88b01ee5a 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4809,6 +4809,13 @@ <ref name="unsignedInt"/> </element> </optional> + <optional> + <element name="label"> + <element name="size"> + <ref name="scaledInteger"/> + </element> + </element> + </optional> </interleave> </element> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index bf3f2fb86..0f6ab39d6 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -13838,6 +13838,24 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr node, &def->size, true, false) < 0) goto cleanup; + if (def->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + if (virDomainParseMemory("./label/size", "./label/size/@unit", ctxt, + &def->labelsize, false, false) < 0) + goto cleanup; + + if (def->labelsize && def->labelsize < 128) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("nvdimm label must be at least 128KiB")); + goto cleanup; + } + + if (def->labelsize >= def->size) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("label size must be smaller than NVDIMM size")); + goto cleanup; + } + } + ret = 0; cleanup: @@ -19521,6 +19539,15 @@ virDomainMemoryDefCheckABIStability(virDomainMemoryDefPtr src, return false; } + if (src->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM && + src->labelsize != dst->labelsize) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Target NVDIMM label size '%llu' doesn't match " + "source NVDIMM label size '%llu'"), + src->labelsize, dst->labelsize); + return false; + } + return virDomainDeviceInfoCheckABIStability(&src->info, &dst->info); } @@ -22677,6 +22704,13 @@ virDomainMemoryTargetDefFormat(virBufferPtr buf, virBufferAsprintf(buf, "<size unit='KiB'>%llu</size>\n", def->size); if (def->targetNode >= 0) virBufferAsprintf(buf, "<node>%d</node>\n", def->targetNode); + if (def->labelsize) { + virBufferAddLit(buf, "<label>\n"); + virBufferAdjustIndent(buf, 2); + virBufferAsprintf(buf, "<size unit='KiB'>%llu</size>\n", def->labelsize); + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</label>\n"); + } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</target>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index fa27708ce..c66a901c2 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2013,6 +2013,7 @@ struct _virDomainMemoryDef { int model; /* virDomainMemoryModel */ int targetNode; unsigned long long size; /* kibibytes */ + unsigned long long labelsize; /* kibibytes; valid only for NVDIMM */ virDomainDeviceInfo info; }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 86a52ef62..89d47e50d 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3420,6 +3420,9 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) if (mem->targetNode >= 0) virBufferAsprintf(&buf, "node=%d,", mem->targetNode); + if (mem->labelsize) + virBufferAsprintf(&buf, "label-size=%llu,", mem->labelsize * 1024); + virBufferAsprintf(&buf, "memdev=mem%s,id=%s", mem->info.alias, mem->info.alias); diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args new file mode 100644 index 000000000..8669f2d84 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=tcg,nvdimm=on \ +-m size=219136k,slots=16,maxmem=1099511627776k \ +-smp 2,sockets=2,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0-1,mem=214 \ +-object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\ +share=no,size=536870912 \ +-device nvdimm,node=0,label-size=131072,memdev=memnvdimm0,id=nvdimm0,slot=0 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \ +-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml new file mode 100644 index 000000000..425385251 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml @@ -0,0 +1,59 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory> + <memory unit='KiB'>1267710</memory> + <currentMemory unit='KiB'>1267710</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <idmap> + <uid start='0' target='1000' count='10'/> + <gid start='0' target='1000' count='10'/> + </idmap> + <cpu> + <topology sockets='2' cores='1' threads='1'/> + <numa> + <cell id='0' cpus='0-1' memory='219136' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + <memory model='nvdimm' access='private'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>523264</size> + <node>0</node> + <label> + <size unit='KiB'>128</size> + </label> + </target> + <address type='dimm' slot='0'/> + </memory> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index bf839fe93..127ebae49 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2341,6 +2341,8 @@ mymain(void) QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("memory-hotplug-nvdimm-access", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST("memory-hotplug-nvdimm-label", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, + QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("machine-aeskeywrap-on-caps", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_AES_KEY_WRAP, diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml new file mode 120000 index 000000000..e357ec582 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml @@ -0,0 +1 @@ +/home/zippy/work/libvirt/libvirt.git/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index ef49a79ca..bf62dbbb2 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1080,6 +1080,7 @@ mymain(void) DO_TEST("memory-hotplug-dimm", NONE); DO_TEST("memory-hotplug-nvdimm", NONE); DO_TEST("memory-hotplug-nvdimm-access", NONE); + DO_TEST("memory-hotplug-nvdimm-label", NONE); DO_TEST("net-udp", NONE); DO_TEST("video-virtio-gpu-device", NONE); -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
For NVDIMM devices it is optionally possible to specify the size of internal storage for namespaces. Namespaces are a feature that allows users to partition the NVDIMM for different uses.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 11 ++++ docs/schemas/domaincommon.rng | 7 +++ src/conf/domain_conf.c | 34 +++++++++++++ src/conf/domain_conf.h | 1 + src/qemu/qemu_command.c | 3 ++ .../qemuxml2argv-memory-hotplug-nvdimm-label.args | 26 ++++++++++ .../qemuxml2argv-memory-hotplug-nvdimm-label.xml | 59 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 + .../qemuxml2xmlout-memory-hotplug-nvdimm-label.xml | 1 + tests/qemuxml2xmltest.c | 1 + 10 files changed, 145 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml
Not that it matters in the grand scheme of things, but @access was split across 2 patches, while @label-size is all in one. The tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml has the same problem as patch 8 as make check fails for me... Required a similar adjustment from qemuxml2xmloutdata directory: ln -sf ../qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml qemuxml2xmlout-memory-hotplug-nvdimm-label.xml FWIW: In re-reading my v2 review - not quite sure what was tripping me up about label-size. Perhaps when I saw that comparison to 128 I started overthinking things and missed the "if (def->labelsize..."
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 4bc3d92f9..bb6df4542 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7122,6 +7122,9 @@ qemu-kvm -net nic,model=? /dev/null <target> <size unit='KiB'>524288</size> <node>1</node> + <label> + <size unit='KiB'>128</size> + </label> </target> </memory> </devices> @@ -7203,6 +7206,14 @@ qemu-kvm -net nic,model=? /dev/null attach the memory to. The element shall be used only if the guest has NUMA nodes configured. </p> + <p> + For NVDIMM type devices one can optionally use + <code>label</code> and its subelement <code>size</code> + to configure the size of namespaces label storage + within the NVDIMM module. The <code>size</code> element + has usual meaning described + <a href="#elementsMemoryAllocation">here</a>. + </p>
Should there be a summary of what was written in: https://www.redhat.com/archives/libvir-list/2016-August/msg00650.html e.g. By default no Namespace Label area is reserved in the file. If the user specifies label-size then the memory at the end of the file is used as the Namespace Label area. For QEMU, there are two restrictions: 1. Minimum label size is 128K 2. Remaining size (total - labelsize) is aligned with 4k for nomral backend or hugepage-size for hugetblfs cannot be 0. (Or reworded sufficiently ;-)... Would be "nice" not lose that and something someone may trip across... ACK w/ the link fixed - bonus points for documentation adjustments. Better to do it now while it's still fresh in your mind rather than months down the road when someone asks and you're in the middle of something else! John [...]
diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml new file mode 120000 index 000000000..e357ec582 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml @@ -0,0 +1 @@ +/home/zippy/work/libvirt/libvirt.git/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml
!!
\ No newline at end of file

When domain is being started up, we ought to relabel the host side of NVDIMM so qemu has access to it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 73 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 67219170c..27aa2bd6a 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -1386,6 +1386,30 @@ virSecurityDACRestoreInputLabel(virSecurityManagerPtr mgr, } +static int +virSecurityDACRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem) +{ + virSecurityDACDataPtr priv = virSecurityManagerGetPrivateData(mgr); + int ret = -1; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + ret = virSecurityDACRestoreFileLabel(priv, mem->nvdimmPath); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + ret = 0; + break; + } + + return ret; +} + + static int virSecurityDACRestoreAllLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -1425,6 +1449,13 @@ virSecurityDACRestoreAllLabel(virSecurityManagerPtr mgr, rc = -1; } + for (i = 0; i < def->nmems; i++) { + if (virSecurityDACRestoreMemoryLabel(mgr, + def, + def->mems[i]) < 0) + rc = -1; + } + if (virDomainChrDefForeach(def, false, virSecurityDACRestoreChardevCallback, @@ -1457,6 +1488,41 @@ virSecurityDACSetChardevCallback(virDomainDefPtr def, } +static int +virSecurityDACSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) + +{ + virSecurityDACDataPtr priv = virSecurityManagerGetPrivateData(mgr); + virSecurityLabelDefPtr seclabel; + int ret = -1; + uid_t user; + gid_t group; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_DAC_NAME); + if (seclabel && !seclabel->relabel) + return 0; + + if (virSecurityDACGetIds(seclabel, priv, &user, &group, NULL, NULL) < 0) + return -1; + + ret = virSecurityDACSetOwnership(priv, NULL, mem->nvdimmPath, user, group); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + ret = 0; + break; + } + + return ret; +} + + static int virSecurityDACSetAllLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -1496,6 +1562,13 @@ virSecurityDACSetAllLabel(virSecurityManagerPtr mgr, return -1; } + for (i = 0; i < def->nmems; i++) { + if (virSecurityDACSetMemoryLabel(mgr, + def, + def->mems[i]) < 0) + return -1; + } + if (virDomainChrDefForeach(def, true, virSecurityDACSetChardevCallback, -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
When domain is being started up, we ought to relabel the host side of NVDIMM so qemu has access to it.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 73 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+)
ACK John

When domain is being started up, we ought to relabel the host side of NVDIMM so qemu has access to it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_selinux.c | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index e22de0653..1be2acd27 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -1381,6 +1381,62 @@ virSecuritySELinuxRestoreInputLabel(virSecurityManagerPtr mgr, } +static int +virSecuritySELinuxSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) +{ + virSecurityLabelDefPtr seclabel; + + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_SELINUX_NAME); + if (seclabel == NULL) + return 0; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (virSecuritySELinuxSetFilecon(mgr, mem->nvdimmPath, + seclabel->imagelabel) < 0) + return -1; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } + + return 0; +} + + +static int +virSecuritySELinuxRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + virSecurityLabelDefPtr seclabel; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_SELINUX_NAME); + if (seclabel == NULL) + return 0; + + ret = virSecuritySELinuxRestoreFileLabel(mgr, mem->nvdimmPath); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + ret = 0; + break; + } + + return ret; +} + + static int virSecuritySELinuxSetTPMFileLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -2325,6 +2381,11 @@ virSecuritySELinuxRestoreAllLabel(virSecurityManagerPtr mgr, rc = -1; } + for (i = 0; i < def->nmems; i++) { + if (virSecuritySELinuxRestoreMemoryLabel(mgr, def, def->mems[i]) < 0) + return -1; + } + for (i = 0; i < def->ndisks; i++) { virDomainDiskDefPtr disk = def->disks[i]; @@ -2711,6 +2772,11 @@ virSecuritySELinuxSetAllLabel(virSecurityManagerPtr mgr, return -1; } + for (i = 0; i < def->nmems; i++) { + if (virSecuritySELinuxSetMemoryLabel(mgr, def, def->mems[i]) < 0) + return -1; + } + if (def->tpm) { if (virSecuritySELinuxSetTPMFileLabel(mgr, def, def->tpm) < 0) return -1; -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
When domain is being started up, we ought to relabel the host side of NVDIMM so qemu has access to it.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_selinux.c | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+)
diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index e22de0653..1be2acd27 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -1381,6 +1381,62 @@ virSecuritySELinuxRestoreInputLabel(virSecurityManagerPtr mgr, }
+static int +virSecuritySELinuxSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) +{ + virSecurityLabelDefPtr seclabel; + + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_SELINUX_NAME); + if (seclabel == NULL) + return 0;
Since it doesn't matter for DIMM, should this go in the NVDIMM label? Although I do see this follow a couple of the other Set*Label functions when there's only one of the switch case statements that uses the seclabel. I guess for consistency it can stay as is, although I wouldn't object to altering code for those single switch/case conditions Also I note that the security_dac code looks at the "->relabel" when making a decision, but that's not done here.
+ + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (virSecuritySELinuxSetFilecon(mgr, mem->nvdimmPath, + seclabel->imagelabel) < 0) + return -1; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } + + return 0; +} + + +static int +virSecuritySELinuxRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + virSecurityLabelDefPtr seclabel; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_SELINUX_NAME); + if (seclabel == NULL) + return 0;
Ironically you did change this one to be different... Similar comment regarding the relabel Conditional ACK of course depending on the relabel thing - you could explain or just provide something that you'll squash in.... John
+ + ret = virSecuritySELinuxRestoreFileLabel(mgr, mem->nvdimmPath); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + ret = 0; + break; + } + + return ret; +} + + static int virSecuritySELinuxSetTPMFileLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -2325,6 +2381,11 @@ virSecuritySELinuxRestoreAllLabel(virSecurityManagerPtr mgr, rc = -1; }
+ for (i = 0; i < def->nmems; i++) { + if (virSecuritySELinuxRestoreMemoryLabel(mgr, def, def->mems[i]) < 0) + return -1; + } + for (i = 0; i < def->ndisks; i++) { virDomainDiskDefPtr disk = def->disks[i];
@@ -2711,6 +2772,11 @@ virSecuritySELinuxSetAllLabel(virSecurityManagerPtr mgr, return -1; }
+ for (i = 0; i < def->nmems; i++) { + if (virSecuritySELinuxSetMemoryLabel(mgr, def, def->mems[i]) < 0) + return -1; + } + if (def->tpm) { if (virSecuritySELinuxSetTPMFileLabel(mgr, def, def->tpm) < 0) return -1;

These APIs will be used whenever we are hot (un-)plugging a memdev. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 2 ++ src/security/security_driver.h | 9 +++++++ src/security/security_manager.c | 56 +++++++++++++++++++++++++++++++++++++++++ src/security/security_manager.h | 7 ++++++ src/security/security_stack.c | 38 ++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6c89d44e2..681414266 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1180,6 +1180,7 @@ virSecurityManagerRestoreAllLabel; virSecurityManagerRestoreDiskLabel; virSecurityManagerRestoreHostdevLabel; virSecurityManagerRestoreImageLabel; +virSecurityManagerRestoreMemoryLabel; virSecurityManagerRestoreSavedStateLabel; virSecurityManagerSetAllLabel; virSecurityManagerSetChildProcessLabel; @@ -1188,6 +1189,7 @@ virSecurityManagerSetDiskLabel; virSecurityManagerSetHostdevLabel; virSecurityManagerSetImageFDLabel; virSecurityManagerSetImageLabel; +virSecurityManagerSetMemoryLabel; virSecurityManagerSetProcessLabel; virSecurityManagerSetSavedStateLabel; virSecurityManagerSetSocketLabel; diff --git a/src/security/security_driver.h b/src/security/security_driver.h index fa65eb359..0f5cce5f8 100644 --- a/src/security/security_driver.h +++ b/src/security/security_driver.h @@ -123,6 +123,12 @@ typedef int (*virSecurityDomainSetImageLabel) (virSecurityManagerPtr mgr, typedef int (*virSecurityDomainRestoreImageLabel) (virSecurityManagerPtr mgr, virDomainDefPtr def, virStorageSourcePtr src); +typedef int (*virSecurityDomainSetMemoryLabel) (virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem); +typedef int (*virSecurityDomainRestoreMemoryLabel) (virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem); typedef int (*virSecurityDomainSetPathLabel) (virSecurityManagerPtr mgr, virDomainDefPtr def, const char *path); @@ -152,6 +158,9 @@ struct _virSecurityDriver { virSecurityDomainSetImageLabel domainSetSecurityImageLabel; virSecurityDomainRestoreImageLabel domainRestoreSecurityImageLabel; + virSecurityDomainSetMemoryLabel domainSetSecurityMemoryLabel; + virSecurityDomainRestoreMemoryLabel domainRestoreSecurityMemoryLabel; + virSecurityDomainSetDaemonSocketLabel domainSetSecurityDaemonSocketLabel; virSecurityDomainSetSocketLabel domainSetSecuritySocketLabel; virSecurityDomainClearSocketLabel domainClearSecuritySocketLabel; diff --git a/src/security/security_manager.c b/src/security/security_manager.c index d8c6facc8..6c777db1e 100644 --- a/src/security/security_manager.c +++ b/src/security/security_manager.c @@ -1052,3 +1052,59 @@ virSecurityManagerDomainSetPathLabel(virSecurityManagerPtr mgr, return 0; } + + +/** + * virSecurityManagerSetMemoryLabel: + * @mgr: security manager object + * @vm: domain definition object + * @mem: memory module to operate on + * + * Labels the host part of a memory module. + * + * Returns: 0 on success, -1 on error. + */ +int +virSecurityManagerSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem) +{ + if (mgr->drv->domainSetSecurityMemoryLabel) { + int ret; + virObjectLock(mgr); + ret = mgr->drv->domainSetSecurityMemoryLabel(mgr, vm, mem); + virObjectUnlock(mgr); + return ret; + } + + virReportUnsupportedError(); + return -1; +} + + +/** + * virSecurityManagerRestoreMemoryLabel: + * @mgr: security manager object + * @vm: domain definition object + * @mem: memory module to operate on + * + * Removes security label from the host part of a memory module. + * + * Returns: 0 on success, -1 on error. + */ +int +virSecurityManagerRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem) +{ + if (mgr->drv->domainRestoreSecurityMemoryLabel) { + int ret; + virObjectLock(mgr); + ret = mgr->drv->domainRestoreSecurityMemoryLabel(mgr, vm, mem); + virObjectUnlock(mgr); + return ret; + } + + virReportUnsupportedError(); + return -1; +} diff --git a/src/security/security_manager.h b/src/security/security_manager.h index bae8493ee..238e66cd0 100644 --- a/src/security/security_manager.h +++ b/src/security/security_manager.h @@ -162,6 +162,13 @@ int virSecurityManagerRestoreImageLabel(virSecurityManagerPtr mgr, virDomainDefPtr vm, virStorageSourcePtr src); +int virSecurityManagerSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem); +int virSecurityManagerRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem); + int virSecurityManagerDomainSetPathLabel(virSecurityManagerPtr mgr, virDomainDefPtr vm, const char *path); diff --git a/src/security/security_stack.c b/src/security/security_stack.c index 6056ae321..b02ee18a8 100644 --- a/src/security/security_stack.c +++ b/src/security/security_stack.c @@ -627,6 +627,41 @@ virSecurityStackRestoreImageLabel(virSecurityManagerPtr mgr, return rc; } +static int +virSecurityStackSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem) +{ + virSecurityStackDataPtr priv = virSecurityManagerGetPrivateData(mgr); + virSecurityStackItemPtr item = priv->itemsHead; + int rc = 0; + + for (; item; item = item->next) { + if (virSecurityManagerSetMemoryLabel(item->securityManager, vm, mem) < 0) + rc = -1; + } + + return rc; +} + +static int +virSecurityStackRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem) +{ + virSecurityStackDataPtr priv = virSecurityManagerGetPrivateData(mgr); + virSecurityStackItemPtr item = priv->itemsHead; + int rc = 0; + + for (; item; item = item->next) { + if (virSecurityManagerRestoreMemoryLabel(item->securityManager, + vm, mem) < 0) + rc = -1; + } + + return rc; +} + static int virSecurityStackDomainSetPathLabel(virSecurityManagerPtr mgr, virDomainDefPtr vm, @@ -669,6 +704,9 @@ virSecurityDriver virSecurityDriverStack = { .domainSetSecurityImageLabel = virSecurityStackSetImageLabel, .domainRestoreSecurityImageLabel = virSecurityStackRestoreImageLabel, + .domainSetSecurityMemoryLabel = virSecurityStackSetMemoryLabel, + .domainRestoreSecurityMemoryLabel = virSecurityStackRestoreMemoryLabel, + .domainSetSecurityDaemonSocketLabel = virSecurityStackSetDaemonSocketLabel, .domainSetSecuritySocketLabel = virSecurityStackSetSocketLabel, .domainClearSecuritySocketLabel = virSecurityStackClearSocketLabel, -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
These APIs will be used whenever we are hot (un-)plugging a memdev.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 2 ++ src/security/security_driver.h | 9 +++++++ src/security/security_manager.c | 56 +++++++++++++++++++++++++++++++++++++++++ src/security/security_manager.h | 7 ++++++ src/security/security_stack.c | 38 ++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+)
ACK John

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 3 +++ src/security/security_nop.c | 19 +++++++++++++++++++ src/security/security_selinux.c | 3 +++ 3 files changed, 25 insertions(+) diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 27aa2bd6a..d6a2daf74 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -1975,6 +1975,9 @@ virSecurityDriver virSecurityDriverDAC = { .domainSetSecurityImageLabel = virSecurityDACSetImageLabel, .domainRestoreSecurityImageLabel = virSecurityDACRestoreImageLabel, + .domainSetSecurityMemoryLabel = virSecurityDACSetMemoryLabel, + .domainRestoreSecurityMemoryLabel = virSecurityDACRestoreMemoryLabel, + .domainSetSecurityDaemonSocketLabel = virSecurityDACSetDaemonSocketLabel, .domainSetSecuritySocketLabel = virSecurityDACSetSocketLabel, .domainClearSecuritySocketLabel = virSecurityDACClearSocketLabel, diff --git a/src/security/security_nop.c b/src/security/security_nop.c index 951125dce..0a9b51528 100644 --- a/src/security/security_nop.c +++ b/src/security/security_nop.c @@ -236,6 +236,22 @@ virSecurityDomainSetImageLabelNop(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, return 0; } +static int +virSecurityDomainSetMemoryLabelNop(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int +virSecurityDomainRestoreMemoryLabelNop(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem ATTRIBUTE_UNUSED) +{ + return 0; +} + virSecurityDriver virSecurityDriverNop = { .privateDataLen = 0, @@ -255,6 +271,9 @@ virSecurityDriver virSecurityDriverNop = { .domainSetSecurityImageLabel = virSecurityDomainSetImageLabelNop, .domainRestoreSecurityImageLabel = virSecurityDomainRestoreImageLabelNop, + .domainSetSecurityMemoryLabel = virSecurityDomainSetMemoryLabelNop, + .domainRestoreSecurityMemoryLabel = virSecurityDomainRestoreMemoryLabelNop, + .domainSetSecurityDaemonSocketLabel = virSecurityDomainSetDaemonSocketLabelNop, .domainSetSecuritySocketLabel = virSecurityDomainSetSocketLabelNop, .domainClearSecuritySocketLabel = virSecurityDomainClearSocketLabelNop, diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 1be2acd27..e20382a02 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -3007,6 +3007,9 @@ virSecurityDriver virSecurityDriverSELinux = { .domainSetSecurityImageLabel = virSecuritySELinuxSetImageLabel, .domainRestoreSecurityImageLabel = virSecuritySELinuxRestoreImageLabel, + .domainSetSecurityMemoryLabel = virSecuritySELinuxSetMemoryLabel, + .domainRestoreSecurityMemoryLabel = virSecuritySELinuxRestoreMemoryLabel, + .domainSetSecurityDaemonSocketLabel = virSecuritySELinuxSetDaemonSocketLabel, .domainSetSecuritySocketLabel = virSecuritySELinuxSetSocketLabel, .domainClearSecuritySocketLabel = virSecuritySELinuxClearSocketLabel, -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 3 +++ src/security/security_nop.c | 19 +++++++++++++++++++ src/security/security_selinux.c | 3 +++ 3 files changed, 25 insertions(+)
ACK, John

Now that we have APIs for relabel memdevs on hotplug, fill in the missing implementation in qemu hotplug code. The qemuSecurity wrappers might look like overkill for now, because qemu namespace code does not deal with the nvdimms yet. Nor does our cgroup code. But hey, there's cgroup_device_acl variable in qemu.conf. If users add their /dev/pmem* device in there, the device is allowed in cgroups and created in the namespace so they can successfully passthrough it to the domain. It doesn't look like overkill after all, does it? Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_hotplug.c | 13 +++++++++++ src/qemu/qemu_security.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_security.h | 8 +++++++ 3 files changed, 77 insertions(+) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 4e416b12e..7e19d2f82 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2215,6 +2215,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, char *objalias = NULL; const char *backendType; bool objAdded = false; + bool teardownlabel = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2244,6 +2245,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, priv->qemuCaps, vm->def, mem, NULL, true) < 0) goto cleanup; + if (qemuSecuritySetMemoryLabel(driver, vm, mem) < 0) + goto cleanup; + teardownlabel = true; + if (virDomainMemoryInsert(vm->def, mem) < 0) { virJSONValueFree(props); goto cleanup; @@ -2288,6 +2293,11 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, audit: virDomainAuditMemory(vm, oldmem, newmem, "update", ret == 0); cleanup: + if (mem && ret < 0) { + if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) + VIR_WARN("Unable to restore security label on memdev"); + } + virObjectUnref(cfg); VIR_FREE(devstr); VIR_FREE(objalias); @@ -3748,6 +3758,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if ((idx = virDomainMemoryFindByDef(vm->def, mem)) >= 0) virDomainMemoryRemove(vm->def, idx); + if (qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) + VIR_WARN("Unable to restore security label on memdev"); + virDomainMemoryDefFree(mem); /* fix the balloon size */ diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index f2931976b..61934f990 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -245,3 +245,59 @@ qemuSecurityRestoreHostdevLabel(virQEMUDriverPtr driver, virSecurityManagerTransactionAbort(driver->securityManager); return ret; } + + +int +qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionStart(driver->securityManager) < 0) + goto cleanup; + + if (virSecurityManagerSetMemoryLabel(driver->securityManager, + vm->def, + mem) < 0) + goto cleanup; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionCommit(driver->securityManager, + vm->pid) < 0) + goto cleanup; + + ret = 0; + cleanup: + virSecurityManagerTransactionAbort(driver->securityManager); + return ret; +} + + +int +qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionStart(driver->securityManager) < 0) + goto cleanup; + + if (virSecurityManagerRestoreMemoryLabel(driver->securityManager, + vm->def, + mem) < 0) + goto cleanup; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionCommit(driver->securityManager, + vm->pid) < 0) + goto cleanup; + + ret = 0; + cleanup: + virSecurityManagerTransactionAbort(driver->securityManager); + return ret; +} diff --git a/src/qemu/qemu_security.h b/src/qemu/qemu_security.h index d86db3f6b..7b25855bf 100644 --- a/src/qemu/qemu_security.h +++ b/src/qemu/qemu_security.h @@ -62,6 +62,14 @@ int qemuSecurityRestoreHostdevLabel(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainHostdevDefPtr hostdev); +int qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem); + +int qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem); + /* Please note that for these APIs there is no wrapper yet. Do NOT blindly add * new APIs here. If an API can touch a /dev file add a proper wrapper instead. */ -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Now that we have APIs for relabel memdevs on hotplug, fill in the missing implementation in qemu hotplug code.
The qemuSecurity wrappers might look like overkill for now, because qemu namespace code does not deal with the nvdimms yet. Nor does our cgroup code. But hey, there's cgroup_device_acl variable in qemu.conf. If users add their /dev/pmem* device in there, the device is allowed in cgroups and created in the namespace so they can successfully passthrough it to the domain. It doesn't look like overkill after all, does it?
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_hotplug.c | 13 +++++++++++ src/qemu/qemu_security.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_security.h | 8 +++++++ 3 files changed, 77 insertions(+)
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 4e416b12e..7e19d2f82 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2215,6 +2215,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, char *objalias = NULL; const char *backendType; bool objAdded = false; + bool teardownlabel = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2244,6 +2245,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, priv->qemuCaps, vm->def, mem, NULL, true) < 0) goto cleanup;
+ if (qemuSecuritySetMemoryLabel(driver, vm, mem) < 0) + goto cleanup; + teardownlabel = true; +
@props will be leaked (found by Coverity) If the code would follow other models, then virJSONValueFree(props) would be in the cleanup label. Since props = NULL is done after the AddObject that should be safe. Of course that means a couple of existing virJSONValueFree(props) would be removed and just goto cleanup left. Whether those are done inline here or as a patch stuffed in before this one - doesn't matter to me...
if (virDomainMemoryInsert(vm->def, mem) < 0) { virJSONValueFree(props); goto cleanup; @@ -2288,6 +2293,11 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, audit: virDomainAuditMemory(vm, oldmem, newmem, "update", ret == 0); cleanup: + if (mem && ret < 0) {
Given this requirement for 'mem' to still be valid one wonders if rather than goto cleanup, maybe the goto should be some label below removedef and above goto audit... Furthermore whether that should also be wrapped in a virSaveLastError... restoredevice: if (!mem) { VIR_WARN("Unable to restore host device la goto audit; } orig_err = virSaveLastError(); if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); virSetError(orig_err); virFreeError(orig_err); I think cgroup and namespace could use the same label - whether or not you have a VIR_WARN for when if (!mem) is true is your call. Using the label leaves out the ugliness of (mem && ret < 0) from the cleanup path. ACK w/ adjustments... John
+ if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) + VIR_WARN("Unable to restore security label on memdev"); + } + virObjectUnref(cfg); VIR_FREE(devstr); VIR_FREE(objalias); @@ -3748,6 +3758,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if ((idx = virDomainMemoryFindByDef(vm->def, mem)) >= 0) virDomainMemoryRemove(vm->def, idx);
+ if (qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) + VIR_WARN("Unable to restore security label on memdev"); + virDomainMemoryDefFree(mem);
/* fix the balloon size */ diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index f2931976b..61934f990 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -245,3 +245,59 @@ qemuSecurityRestoreHostdevLabel(virQEMUDriverPtr driver, virSecurityManagerTransactionAbort(driver->securityManager); return ret; } + + +int +qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionStart(driver->securityManager) < 0) + goto cleanup; + + if (virSecurityManagerSetMemoryLabel(driver->securityManager, + vm->def, + mem) < 0) + goto cleanup; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionCommit(driver->securityManager, + vm->pid) < 0) + goto cleanup; + + ret = 0; + cleanup: + virSecurityManagerTransactionAbort(driver->securityManager); + return ret; +} + + +int +qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionStart(driver->securityManager) < 0) + goto cleanup; + + if (virSecurityManagerRestoreMemoryLabel(driver->securityManager, + vm->def, + mem) < 0) + goto cleanup; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionCommit(driver->securityManager, + vm->pid) < 0) + goto cleanup; + + ret = 0; + cleanup: + virSecurityManagerTransactionAbort(driver->securityManager); + return ret; +} diff --git a/src/qemu/qemu_security.h b/src/qemu/qemu_security.h index d86db3f6b..7b25855bf 100644 --- a/src/qemu/qemu_security.h +++ b/src/qemu/qemu_security.h @@ -62,6 +62,14 @@ int qemuSecurityRestoreHostdevLabel(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainHostdevDefPtr hostdev);
+int qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem); + +int qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem); + /* Please note that for these APIs there is no wrapper yet. Do NOT blindly add * new APIs here. If an API can touch a /dev file add a proper wrapper instead. */

On 03/14/2017 03:58 PM, John Ferlan wrote:
On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Now that we have APIs for relabel memdevs on hotplug, fill in the missing implementation in qemu hotplug code.
The qemuSecurity wrappers might look like overkill for now, because qemu namespace code does not deal with the nvdimms yet. Nor does our cgroup code. But hey, there's cgroup_device_acl variable in qemu.conf. If users add their /dev/pmem* device in there, the device is allowed in cgroups and created in the namespace so they can successfully passthrough it to the domain. It doesn't look like overkill after all, does it?
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_hotplug.c | 13 +++++++++++ src/qemu/qemu_security.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_security.h | 8 +++++++ 3 files changed, 77 insertions(+)
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 4e416b12e..7e19d2f82 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2215,6 +2215,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, char *objalias = NULL; const char *backendType; bool objAdded = false; + bool teardownlabel = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2244,6 +2245,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, priv->qemuCaps, vm->def, mem, NULL, true) < 0) goto cleanup;
+ if (qemuSecuritySetMemoryLabel(driver, vm, mem) < 0) + goto cleanup; + teardownlabel = true; +
@props will be leaked (found by Coverity)
If the code would follow other models, then virJSONValueFree(props) would be in the cleanup label. Since props = NULL is done after the AddObject that should be safe. Of course that means a couple of existing virJSONValueFree(props) would be removed and just goto cleanup left. Whether those are done inline here or as a patch stuffed in before this one - doesn't matter to me...
Ah, sure. I'll remove all those virJSONValueFree() and just have one under cleanup.
if (virDomainMemoryInsert(vm->def, mem) < 0) { virJSONValueFree(props); goto cleanup; @@ -2288,6 +2293,11 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, audit: virDomainAuditMemory(vm, oldmem, newmem, "update", ret == 0); cleanup: + if (mem && ret < 0) {
Given this requirement for 'mem' to still be valid one wonders if rather than goto cleanup, maybe the goto should be some label below removedef and above goto audit... Furthermore whether that should also be wrapped in a virSaveLastError...
restoredevice: if (!mem) { VIR_WARN("Unable to restore host device la goto audit; }
orig_err = virSaveLastError(); if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); virSetError(orig_err); virFreeError(orig_err);
I think cgroup and namespace could use the same label - whether or not you have a VIR_WARN for when if (!mem) is true is your call.
Using the label leaves out the ugliness of (mem && ret < 0) from the cleanup path.
I don't think it is ugly. Frankly, I find the current state with 4 labels unacceptable and much more ugly. I will fix that in a follow up patch. Michal

Some users might want to pass a blockdev or a chardev as a backend for NVDIMM. In fact, this is expected to be the mostly used configuration. Therefore libvirt should allow the device in devices CGroup then. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_cgroup.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_cgroup.h | 4 ++++ src/qemu/qemu_hotplug.c | 10 ++++++++++ 3 files changed, 63 insertions(+) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 42a47a798..36762d4f6 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -348,6 +348,50 @@ qemuTeardownHostdevCgroup(virDomainObjPtr vm, } +int +qemuSetupMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int rv; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + VIR_DEBUG("Setting devices Cgroup for NVDIMM device: %s", mem->nvdimmPath); + rv = virCgroupAllowDevicePath(priv->cgroup, mem->nvdimmPath, + VIR_CGROUP_DEVICE_RW, false); + virDomainAuditCgroupPath(vm, priv->cgroup, "allow", + mem->nvdimmPath, "rw", rv == 0); + + return rv; +} + + +int +qemuTeardownMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int rv; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + rv = virCgroupDenyDevicePath(priv->cgroup, mem->nvdimmPath, + VIR_CGROUP_DEVICE_RWM, false); + virDomainAuditCgroupPath(vm, priv->cgroup, + "deny", mem->nvdimmPath, "rwm", rv == 0); + return rv; +} + + static int qemuSetupGraphicsCgroup(virDomainObjPtr vm, virDomainGraphicsDefPtr gfx) @@ -647,6 +691,11 @@ qemuSetupDevicesCgroup(virQEMUDriverPtr driver, goto cleanup; } + for (i = 0; i < vm->def->nmems; i++) { + if (qemuSetupMemoryDevicesCgroup(vm, vm->def->mems[i]) < 0) + goto cleanup; + } + for (i = 0; i < vm->def->ngraphics; i++) { if (qemuSetupGraphicsCgroup(vm, vm->def->graphics[i]) < 0) goto cleanup; diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8ae4a72ab..d016ce29d 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -43,6 +43,10 @@ int qemuSetupHostdevCgroup(virDomainObjPtr vm, int qemuTeardownHostdevCgroup(virDomainObjPtr vm, virDomainHostdevDefPtr dev) ATTRIBUTE_RETURN_CHECK; +int qemuSetupMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem); +int qemuTeardownMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem); int qemuSetupRNGCgroup(virDomainObjPtr vm, virDomainRNGDefPtr rng); int qemuTeardownRNGCgroup(virDomainObjPtr vm, diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 7e19d2f82..829b40258 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2216,6 +2216,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, const char *backendType; bool objAdded = false; bool teardownlabel = false; + bool teardowncgroup = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2245,6 +2246,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, priv->qemuCaps, vm->def, mem, NULL, true) < 0) goto cleanup; + if (qemuSetupMemoryDevicesCgroup(vm, mem) < 0) + goto cleanup; + teardowncgroup = true; + if (qemuSecuritySetMemoryLabel(driver, vm, mem) < 0) goto cleanup; teardownlabel = true; @@ -2294,6 +2299,8 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, virDomainAuditMemory(vm, oldmem, newmem, "update", ret == 0); cleanup: if (mem && ret < 0) { + if (teardowncgroup && qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) + VIR_WARN("Unable to remove memory device cgroup ACL on hotplug fail"); if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); } @@ -3761,6 +3768,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if (qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); + if (qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) + VIR_WARN("Unable to remove memory device cgroup ACL"); + virDomainMemoryDefFree(mem); /* fix the balloon size */ -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Some users might want to pass a blockdev or a chardev as a backend for NVDIMM. In fact, this is expected to be the mostly used configuration. Therefore libvirt should allow the device in devices CGroup then.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_cgroup.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_cgroup.h | 4 ++++ src/qemu/qemu_hotplug.c | 10 ++++++++++ 3 files changed, 63 insertions(+)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 42a47a798..36762d4f6 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -348,6 +348,50 @@ qemuTeardownHostdevCgroup(virDomainObjPtr vm, }
+int +qemuSetupMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int rv; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + VIR_DEBUG("Setting devices Cgroup for NVDIMM device: %s", mem->nvdimmPath); + rv = virCgroupAllowDevicePath(priv->cgroup, mem->nvdimmPath, + VIR_CGROUP_DEVICE_RW, false); + virDomainAuditCgroupPath(vm, priv->cgroup, "allow", + mem->nvdimmPath, "rw", rv == 0); + + return rv; +} + + +int +qemuTeardownMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int rv; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + rv = virCgroupDenyDevicePath(priv->cgroup, mem->nvdimmPath, + VIR_CGROUP_DEVICE_RWM, false); + virDomainAuditCgroupPath(vm, priv->cgroup, + "deny", mem->nvdimmPath, "rwm", rv == 0); + return rv; +} + + static int qemuSetupGraphicsCgroup(virDomainObjPtr vm, virDomainGraphicsDefPtr gfx) @@ -647,6 +691,11 @@ qemuSetupDevicesCgroup(virQEMUDriverPtr driver, goto cleanup; }
+ for (i = 0; i < vm->def->nmems; i++) { + if (qemuSetupMemoryDevicesCgroup(vm, vm->def->mems[i]) < 0) + goto cleanup; + } + for (i = 0; i < vm->def->ngraphics; i++) { if (qemuSetupGraphicsCgroup(vm, vm->def->graphics[i]) < 0) goto cleanup; diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8ae4a72ab..d016ce29d 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -43,6 +43,10 @@ int qemuSetupHostdevCgroup(virDomainObjPtr vm, int qemuTeardownHostdevCgroup(virDomainObjPtr vm, virDomainHostdevDefPtr dev) ATTRIBUTE_RETURN_CHECK; +int qemuSetupMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem); +int qemuTeardownMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem); int qemuSetupRNGCgroup(virDomainObjPtr vm, virDomainRNGDefPtr rng); int qemuTeardownRNGCgroup(virDomainObjPtr vm, diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 7e19d2f82..829b40258 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2216,6 +2216,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, const char *backendType; bool objAdded = false; bool teardownlabel = false; + bool teardowncgroup = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2245,6 +2246,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, priv->qemuCaps, vm->def, mem, NULL, true) < 0) goto cleanup;
+ if (qemuSetupMemoryDevicesCgroup(vm, mem) < 0) + goto cleanup; + teardowncgroup = true; +
This has the same @props leak as Patch15
if (qemuSecuritySetMemoryLabel(driver, vm, mem) < 0) goto cleanup; teardownlabel = true; @@ -2294,6 +2299,8 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, virDomainAuditMemory(vm, oldmem, newmem, "update", ret == 0); cleanup: if (mem && ret < 0) { + if (teardowncgroup && qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) + VIR_WARN("Unable to remove memory device cgroup ACL on hotplug fail");
FWIW: based on patch15 comments this would move too
if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); } @@ -3761,6 +3768,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if (qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev");
+ if (qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) + VIR_WARN("Unable to remove memory device cgroup ACL"); +
The order here is different than attach failure path of teardown cgroup, restore label If there's a relationship between them, then order could be important. ACK in principle and assuming you're following patch15 adjustments anyway. John
virDomainMemoryDefFree(mem);
/* fix the balloon size */

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 8 ++++++ src/qemu/qemu_hotplug.c | 11 +++++++ 3 files changed, 95 insertions(+) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 495242981..552523617 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7526,6 +7526,37 @@ qemuDomainSetupAllHostdevs(virQEMUDriverConfigPtr cfg, } +static int +qemuDomainSetupMemory(virQEMUDriverConfigPtr cfg ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem, + const char *devPath) +{ + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + return qemuDomainCreateDevice(mem->nvdimmPath, devPath, false); +} + + +static int +qemuDomainSetupAllMemories(virQEMUDriverConfigPtr cfg, + virDomainObjPtr vm, + const char *devPath) +{ + size_t i; + + VIR_DEBUG("Setting up memories"); + for (i = 0; i < vm->def->nmems; i++) { + if (qemuDomainSetupMemory(cfg, + vm->def->mems[i], + devPath) < 0) + return -1; + } + VIR_DEBUG("Setup all memories"); + return 0; +} + + static int qemuDomainSetupChardev(virDomainDefPtr def ATTRIBUTE_UNUSED, virDomainChrDefPtr dev, @@ -7770,6 +7801,9 @@ qemuDomainBuildNamespace(virQEMUDriverConfigPtr cfg, if (qemuDomainSetupAllHostdevs(cfg, vm, devPath) < 0) goto cleanup; + if (qemuDomainSetupAllMemories(cfg, vm, devPath) < 0) + goto cleanup; + if (qemuDomainSetupAllChardevs(cfg, vm, devPath) < 0) goto cleanup; @@ -8228,6 +8262,48 @@ qemuDomainNamespaceTeardownHostdev(virQEMUDriverPtr driver, } +int +qemuDomainNamespaceSetupMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) + return 0; + + if (qemuDomainAttachDeviceMknod(driver, vm, mem->nvdimmPath) < 0) + goto cleanup; + ret = 0; + cleanup: + return ret; +} + + +int +qemuDomainNamespaceTeardownMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) + return 0; + + if (qemuDomainDetachDeviceUnlink(driver, vm, mem->nvdimmPath) < 0) + goto cleanup; + ret = 0; + cleanup: + return ret; +} + + int qemuDomainNamespaceSetupChardev(virQEMUDriverPtr driver, virDomainObjPtr vm, diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 7fa717390..f5b8d3fe7 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -834,6 +834,14 @@ int qemuDomainNamespaceTeardownHostdev(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainHostdevDefPtr hostdev); +int qemuDomainNamespaceSetupMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr memory); + +int qemuDomainNamespaceTeardownMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr memory); + int qemuDomainNamespaceSetupChardev(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainChrDefPtr chr); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 829b40258..392b2fd02 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2217,6 +2217,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, bool objAdded = false; bool teardownlabel = false; bool teardowncgroup = false; + bool teardowndevice = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2246,6 +2247,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, priv->qemuCaps, vm->def, mem, NULL, true) < 0) goto cleanup; + if (qemuDomainNamespaceSetupMemory(driver, vm, mem) < 0) + goto removedef; + teardowndevice = true; + if (qemuSetupMemoryDevicesCgroup(vm, mem) < 0) goto cleanup; teardowncgroup = true; @@ -2303,6 +2308,9 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, VIR_WARN("Unable to remove memory device cgroup ACL on hotplug fail"); if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); + if (teardowndevice && + qemuDomainNamespaceTeardownMemory(driver, vm, mem) < 0) + VIR_WARN("Unable to remove memory device from /dev"); } virObjectUnref(cfg); @@ -3771,6 +3779,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if (qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) VIR_WARN("Unable to remove memory device cgroup ACL"); + if (qemuDomainNamespaceTeardownMemory(driver, vm, mem) < 0) + VIR_WARN("Unable to remove memory device from /dev"); + virDomainMemoryDefFree(mem); /* fix the balloon size */ -- 2.11.0

On 03/09/2017 11:06 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 8 ++++++ src/qemu/qemu_hotplug.c | 11 +++++++ 3 files changed, 95 insertions(+)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 495242981..552523617 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7526,6 +7526,37 @@ qemuDomainSetupAllHostdevs(virQEMUDriverConfigPtr cfg, }
+static int +qemuDomainSetupMemory(virQEMUDriverConfigPtr cfg ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem, + const char *devPath) +{ + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + return qemuDomainCreateDevice(mem->nvdimmPath, devPath, false); +} + + +static int +qemuDomainSetupAllMemories(virQEMUDriverConfigPtr cfg, + virDomainObjPtr vm, + const char *devPath) +{ + size_t i; + + VIR_DEBUG("Setting up memories"); + for (i = 0; i < vm->def->nmems; i++) { + if (qemuDomainSetupMemory(cfg, + vm->def->mems[i], + devPath) < 0) + return -1; + } + VIR_DEBUG("Setup all memories"); + return 0; +} + + static int qemuDomainSetupChardev(virDomainDefPtr def ATTRIBUTE_UNUSED, virDomainChrDefPtr dev, @@ -7770,6 +7801,9 @@ qemuDomainBuildNamespace(virQEMUDriverConfigPtr cfg, if (qemuDomainSetupAllHostdevs(cfg, vm, devPath) < 0) goto cleanup;
+ if (qemuDomainSetupAllMemories(cfg, vm, devPath) < 0) + goto cleanup; + if (qemuDomainSetupAllChardevs(cfg, vm, devPath) < 0) goto cleanup;
@@ -8228,6 +8262,48 @@ qemuDomainNamespaceTeardownHostdev(virQEMUDriverPtr driver, }
+int +qemuDomainNamespaceSetupMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) + return 0; + + if (qemuDomainAttachDeviceMknod(driver, vm, mem->nvdimmPath) < 0) + goto cleanup; + ret = 0; + cleanup: + return ret; +} + + +int +qemuDomainNamespaceTeardownMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) + return 0; + + if (qemuDomainDetachDeviceUnlink(driver, vm, mem->nvdimmPath) < 0) + goto cleanup; + ret = 0; + cleanup: + return ret; +} + + int qemuDomainNamespaceSetupChardev(virQEMUDriverPtr driver, virDomainObjPtr vm, diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 7fa717390..f5b8d3fe7 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -834,6 +834,14 @@ int qemuDomainNamespaceTeardownHostdev(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainHostdevDefPtr hostdev);
+int qemuDomainNamespaceSetupMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr memory); + +int qemuDomainNamespaceTeardownMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr memory); + int qemuDomainNamespaceSetupChardev(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainChrDefPtr chr); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 829b40258..392b2fd02 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2217,6 +2217,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, bool objAdded = false; bool teardownlabel = false; bool teardowncgroup = false; + bool teardowndevice = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2246,6 +2247,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, priv->qemuCaps, vm->def, mem, NULL, true) < 0) goto cleanup;
+ if (qemuDomainNamespaceSetupMemory(driver, vm, mem) < 0) + goto removedef;
goto cleanup; or whatever label was created from patch15 The virDomainMemoryInsert has not occurred yet.
+ teardowndevice = true; + if (qemuSetupMemoryDevicesCgroup(vm, mem) < 0) goto cleanup; teardowncgroup = true; @@ -2303,6 +2308,9 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, VIR_WARN("Unable to remove memory device cgroup ACL on hotplug fail"); if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); + if (teardowndevice && + qemuDomainNamespaceTeardownMemory(driver, vm, mem) < 0) + VIR_WARN("Unable to remove memory device from /dev");
Again here you could have an ordering problem... If setup was A, B, C, then shouldn't teardown be C, B, A? ACK with adjustments already mentioned from patch15/16 John
}
virObjectUnref(cfg); @@ -3771,6 +3779,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if (qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) VIR_WARN("Unable to remove memory device cgroup ACL");
+ if (qemuDomainNamespaceTeardownMemory(driver, vm, mem) < 0) + VIR_WARN("Unable to remove memory device from /dev"); + virDomainMemoryDefFree(mem);
/* fix the balloon size */
participants (2)
-
John Ferlan
-
Michal Privoznik