[libvirt] [PATCH v2 00/14] Introduce NVDIMM support

v2 of: https://www.redhat.com/archives/libvir-list/2017-February/msg01229.html How to test this feature? $ > /tmp/nvdimm $ truncate --size 512M /tmp/nvdimm $ virsh edit $mydom <memory model='nvdimm' access='shared'> <source> <path>/tmp/nvdimm</path> </source> <target> <size unit='MiB'>512</size> <node>0</node> <label> <size unit='KiB'>128</size> </label> </target> <address type='dimm' slot='0'/> </memory> $ virsh start $mydom $ ssh $mydom "echo \"Hello world\" > /dev/pmem0" $ hexdump -C /tmp/nvdimm 00000000 48 65 6c 6c 6f 20 77 6f 72 6c 64 0a 00 00 00 00 |Hello world.....| 00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 20000000 Michal Privoznik (14): qemuBuildMemoryBackendStr: Reorder args and update comment Introduce NVDIMM memory model qemu: Introduce QEMU_CAPS_DEVICE_NVDIMM qemu: Implement NVDIMM conf: Introduce @access to <memory/> qemu: Implement @access for <memory/> banks qemu: Introduce label-size for NVDIMMs security_dac: Label host side of NVDIMM security_selinux: Label host side of NVDIMM security: Introduce internal APIs for memdev labelling secdrivers: Implement memdev relabel APIs qemu_hotplug: Relabel memdev qemu: Allow nvdimm in devices CGroups qemu: Namespaces for NVDIMM docs/formatdomain.html.in | 79 ++++++++--- docs/schemas/domaincommon.rng | 47 +++++-- src/conf/domain_conf.c | 131 +++++++++++++---- src/conf/domain_conf.h | 5 + src/libvirt_private.syms | 2 + src/qemu/qemu_alias.c | 10 +- src/qemu/qemu_capabilities.c | 9 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_cgroup.c | 49 +++++++ src/qemu/qemu_cgroup.h | 4 + src/qemu/qemu_command.c | 155 +++++++++++++++------ src/qemu/qemu_command.h | 16 ++- src/qemu/qemu_domain.c | 105 +++++++++++++- src/qemu/qemu_domain.h | 8 ++ src/qemu/qemu_hotplug.c | 42 +++++- src/qemu/qemu_security.c | 56 ++++++++ src/qemu/qemu_security.h | 8 ++ src/security/security_dac.c | 76 ++++++++++ src/security/security_driver.h | 9 ++ src/security/security_manager.c | 56 ++++++++ src/security/security_manager.h | 7 + src/security/security_nop.c | 19 +++ src/security/security_selinux.c | 69 +++++++++ src/security/security_stack.c | 38 +++++ .../qemuxml2argv-memory-hotplug-nvdimm-access.args | 26 ++++ .../qemuxml2argv-memory-hotplug-nvdimm-access.xml | 56 ++++++++ .../qemuxml2argv-memory-hotplug-nvdimm-label.args | 26 ++++ .../qemuxml2argv-memory-hotplug-nvdimm-label.xml | 59 ++++++++ .../qemuxml2argv-memory-hotplug-nvdimm.args | 26 ++++ .../qemuxml2argv-memory-hotplug-nvdimm.xml | 56 ++++++++ tests/qemuxml2argvtest.c | 8 +- ...qemuxml2xmlout-memory-hotplug-nvdimm-access.xml | 1 + .../qemuxml2xmlout-memory-hotplug-nvdimm-label.xml | 1 + .../qemuxml2xmlout-memory-hotplug-nvdimm.xml | 1 + tests/qemuxml2xmltest.c | 3 + 35 files changed, 1144 insertions(+), 120 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml -- 2.11.0

Frankly, this function is one big mess. A lot of arguments, complicated behaviour. It's really surprising that arguments were in random order (input and output arguments were mixed together), the documentation was outdated, the description of return values was bogus. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 64 +++++++++++++++++++++++++++---------------------- src/qemu/qemu_command.h | 14 +++++------ src/qemu/qemu_hotplug.c | 7 +++--- 3 files changed, 46 insertions(+), 39 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 41eecfd18..7c52712d1 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3077,38 +3077,47 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, /** * qemuBuildMemoryBackendStr: + * @backendProps: [out] constructed object + * @backendType: [out] type of the backennd used + * @cfg: qemu driver config object + * @qemuCaps: qemu capabilities object + * @def: domain definition object * @size: size of the memory device in kibibytes * @pagesize: size of the requested memory page in KiB, 0 for default * @guestNode: NUMA node in the guest that the memory object will be attached * to, or -1 if NUMA is not used in the guest - * @hostNodes: map of host nodes to alloc the memory in, NULL for default - * @autoNodeset: fallback nodeset in case of automatic numa placement - * @def: domain definition object - * @qemuCaps: qemu capabilities object - * @cfg: qemu driver config object - * @aliasPrefix: prefix string of the alias (to allow for multiple frontents) - * @id: index of the device (to construct the alias) - * @backendStr: returns the object string + * @userNodeset: user requested map of host nodes to alloc the memory on, NULL + * for default + * @autoNodeset: fallback nodeset in case of automatic NUMA placement + * @force: forcibly use one of the backends * - * Formats the configuration string for the memory device backend according - * to the configuration. @pagesize and @hostNodes can be used to override the - * default source configuration, both are optional. + * Creates a configuration object that represents memory backend of given guest + * NUMA node (domain @def and @guestNode) of size @size. @pagesize can be used + * to override configured size of hugepages. Use @userNodeset and @autoNodeset + * to fine tune the placement of the memory on the host NUMA nodes. * - * Returns 0 on success, 1 if only the implicit memory-device-ram with no - * other configuration was used (to detect legacy configurations). Returns - * -1 in case of an error. + * By default, if no memory-backend-* object is necessary to fulfil the guest + * configuration value of 1 is returned. This behaviour can be suppressed by + * setting @force to true in which case 0 would be returned. + * + * Then, if one of the two memory-backend-* should be used, the @qemuCaps is + * consulted to check if qemu does support it. + * + * Returns: 0 on success, + * 1 on success and if there's no need to use memory-backend-* + * -1 on error. */ int -qemuBuildMemoryBackendStr(unsigned long long size, - unsigned long long pagesize, +qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, + const char **backendType, + virQEMUDriverConfigPtr cfg, + virQEMUCapsPtr qemuCaps, + virDomainDefPtr def, int guestNode, + unsigned long long size, + unsigned long long pagesize, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, - virDomainDefPtr def, - virQEMUCapsPtr qemuCaps, - virQEMUDriverConfigPtr cfg, - const char **backendType, - virJSONValuePtr *backendProps, bool force) { virDomainHugePagePtr master_hugepage = NULL; @@ -3327,9 +3336,9 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def, if (virAsprintf(&alias, "ram-node%zu", cell) < 0) goto cleanup; - if ((rc = qemuBuildMemoryBackendStr(memsize, 0, cell, NULL, auto_nodeset, - def, qemuCaps, cfg, &backendType, - &props, false)) < 0) + if ((rc = qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, + def, cell, memsize, 0, NULL, + auto_nodeset, false)) < 0) goto cleanup; if (!(*backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType, @@ -3368,10 +3377,9 @@ qemuBuildMemoryDimmBackendStr(virDomainMemoryDefPtr mem, if (virAsprintf(&alias, "mem%s", mem->info.alias) < 0) goto cleanup; - if (qemuBuildMemoryBackendStr(mem->size, mem->pagesize, - mem->targetNode, mem->sourceNodes, auto_nodeset, - def, qemuCaps, cfg, - &backendType, &props, true) < 0) + if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, + mem->targetNode, mem->size, mem->pagesize, + mem->sourceNodes, auto_nodeset, true) < 0) goto cleanup; ret = virQEMUBuildObjectCommandlineFromJSON(backendType, alias, props); diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index 69fe84613..efdad77f3 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -124,16 +124,16 @@ char *qemuBuildControllerDevStr(const virDomainDef *domainDef, virQEMUCapsPtr qemuCaps, int *nusbcontroller); -int qemuBuildMemoryBackendStr(unsigned long long size, - unsigned long long pagesize, +int qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, + const char **backendType, + virQEMUDriverConfigPtr cfg, + virQEMUCapsPtr qemuCaps, + virDomainDefPtr def, int guestNode, + unsigned long long size, + unsigned long long pagesize, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, - virDomainDefPtr def, - virQEMUCapsPtr qemuCaps, - virQEMUDriverConfigPtr cfg, - const char **backendType, - virJSONValuePtr *backendProps, bool force); char *qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 97fb272f6..c7b8074d6 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2216,10 +2216,9 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, if (!(devstr = qemuBuildMemoryDeviceStr(mem))) goto cleanup; - if (qemuBuildMemoryBackendStr(mem->size, mem->pagesize, - mem->targetNode, mem->sourceNodes, NULL, - vm->def, priv->qemuCaps, cfg, - &backendType, &props, true) < 0) + if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, priv->qemuCaps, + vm->def, mem->targetNode, mem->size, + mem->pagesize, mem->sourceNodes, NULL, true) < 0) goto cleanup; if (virDomainMemoryInsert(vm->def, mem) < 0) { -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Frankly, this function is one big mess. A lot of arguments, complicated behaviour. It's really surprising that arguments were in random order (input and output arguments were mixed together), the documentation was outdated, the description of return values was bogus.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 64 +++++++++++++++++++++++++++---------------------- src/qemu/qemu_command.h | 14 +++++------ src/qemu/qemu_hotplug.c | 7 +++--- 3 files changed, 46 insertions(+), 39 deletions(-)
Why not have qemuBuildMemoryBackendStr take a virDomainMemoryDefPtr? In qemuBuildMemoryCellBackendStr you could create one on the stack (virDomainMemoryDef mem = {0};) filling in memsize and cell appropriately before calling... Probably makes a subsequent patch easier too... Perhaps a multistep process to remove the "extra" args and pass the pointer instead... Then alter the order of the args afterwards. Although I think perhaps the current "force" is misnamed and could be a output too...
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 41eecfd18..7c52712d1 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3077,38 +3077,47 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd,
/** * qemuBuildMemoryBackendStr: + * @backendProps: [out] constructed object + * @backendType: [out] type of the backennd used
s/backennd/backend Why have [out] parameters first and not last? Sure that nasty force flag is at the end now, but that could move. Not that we have a standard for this ~/~ yet...
+ * @cfg: qemu driver config object + * @qemuCaps: qemu capabilities object + * @def: domain definition object * @size: size of the memory device in kibibytes * @pagesize: size of the requested memory page in KiB, 0 for default * @guestNode: NUMA node in the guest that the memory object will be attached * to, or -1 if NUMA is not used in the guest - * @hostNodes: map of host nodes to alloc the memory in, NULL for default - * @autoNodeset: fallback nodeset in case of automatic numa placement - * @def: domain definition object - * @qemuCaps: qemu capabilities object - * @cfg: qemu driver config object - * @aliasPrefix: prefix string of the alias (to allow for multiple frontents) - * @id: index of the device (to construct the alias) - * @backendStr: returns the object string + * @userNodeset: user requested map of host nodes to alloc the memory on, NULL + * for default + * @autoNodeset: fallback nodeset in case of automatic NUMA placement + * @force: forcibly use one of the backends
I see that commit '1c4f3b5' allows the code to change "force" from it's passed value (from false to true)... Really makes things quite ugly... and it seems about to get even uglier. That seemingly would only affect qemuBuildMemoryCellBackendStr since it's the only one passing 'false', but does alter some of the following description.
* - * Formats the configuration string for the memory device backend according - * to the configuration. @pagesize and @hostNodes can be used to override the - * default source configuration, both are optional. + * Creates a configuration object that represents memory backend of given guest + * NUMA node (domain @def and @guestNode) of size @size. @pagesize can be used + * to override configured size of hugepages. Use @userNodeset and @autoNodeset + * to fine tune the placement of the memory on the host NUMA nodes. * - * Returns 0 on success, 1 if only the implicit memory-device-ram with no - * other configuration was used (to detect legacy configurations). Returns - * -1 in case of an error. + * By default, if no memory-backend-* object is necessary to fulfil the guest + * configuration value of 1 is returned. This behaviour can be suppressed by + * setting @force to true in which case 0 would be returned. + * + * Then, if one of the two memory-backend-* should be used, the @qemuCaps is + * consulted to check if qemu does support it. + * + * Returns: 0 on success, + * 1 on success and if there's no need to use memory-backend-* + * -1 on error. */
Perhaps the name 'force' is bad and could be turned into an output value too.... That way it's a 0 or -1 return value and there's a "bool *need_backend;"... I'm sure the logic could be reworked to check if someone cares (e.g. a NULL could be passed) and if they do care, then set the value based on whether or not it's needed. One other NIT with the existing code... there's a check "&& !memAccess" of course that should be (memAccess != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)... John
int -qemuBuildMemoryBackendStr(unsigned long long size, - unsigned long long pagesize, +qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, + const char **backendType, + virQEMUDriverConfigPtr cfg, + virQEMUCapsPtr qemuCaps, + virDomainDefPtr def, int guestNode, + unsigned long long size, + unsigned long long pagesize, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, - virDomainDefPtr def, - virQEMUCapsPtr qemuCaps, - virQEMUDriverConfigPtr cfg, - const char **backendType, - virJSONValuePtr *backendProps, bool force) { virDomainHugePagePtr master_hugepage = NULL; @@ -3327,9 +3336,9 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def, if (virAsprintf(&alias, "ram-node%zu", cell) < 0) goto cleanup;
- if ((rc = qemuBuildMemoryBackendStr(memsize, 0, cell, NULL, auto_nodeset, - def, qemuCaps, cfg, &backendType, - &props, false)) < 0) + if ((rc = qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, + def, cell, memsize, 0, NULL, + auto_nodeset, false)) < 0) goto cleanup;
if (!(*backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType, @@ -3368,10 +3377,9 @@ qemuBuildMemoryDimmBackendStr(virDomainMemoryDefPtr mem, if (virAsprintf(&alias, "mem%s", mem->info.alias) < 0) goto cleanup;
- if (qemuBuildMemoryBackendStr(mem->size, mem->pagesize, - mem->targetNode, mem->sourceNodes, auto_nodeset, - def, qemuCaps, cfg, - &backendType, &props, true) < 0) + if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, + mem->targetNode, mem->size, mem->pagesize, + mem->sourceNodes, auto_nodeset, true) < 0) goto cleanup;
ret = virQEMUBuildObjectCommandlineFromJSON(backendType, alias, props); diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index 69fe84613..efdad77f3 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -124,16 +124,16 @@ char *qemuBuildControllerDevStr(const virDomainDef *domainDef, virQEMUCapsPtr qemuCaps, int *nusbcontroller);
-int qemuBuildMemoryBackendStr(unsigned long long size, - unsigned long long pagesize, +int qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, + const char **backendType, + virQEMUDriverConfigPtr cfg, + virQEMUCapsPtr qemuCaps, + virDomainDefPtr def, int guestNode, + unsigned long long size, + unsigned long long pagesize, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, - virDomainDefPtr def, - virQEMUCapsPtr qemuCaps, - virQEMUDriverConfigPtr cfg, - const char **backendType, - virJSONValuePtr *backendProps, bool force);
char *qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 97fb272f6..c7b8074d6 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2216,10 +2216,9 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, if (!(devstr = qemuBuildMemoryDeviceStr(mem))) goto cleanup;
- if (qemuBuildMemoryBackendStr(mem->size, mem->pagesize, - mem->targetNode, mem->sourceNodes, NULL, - vm->def, priv->qemuCaps, cfg, - &backendType, &props, true) < 0) + if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, priv->qemuCaps, + vm->def, mem->targetNode, mem->size, + mem->pagesize, mem->sourceNodes, NULL, true) < 0) goto cleanup;
if (virDomainMemoryInsert(vm->def, mem) < 0) {

On 03/07/2017 05:00 PM, John Ferlan wrote:
On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Frankly, this function is one big mess. A lot of arguments, complicated behaviour. It's really surprising that arguments were in random order (input and output arguments were mixed together), the documentation was outdated, the description of return values was bogus.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 64 +++++++++++++++++++++++++++---------------------- src/qemu/qemu_command.h | 14 +++++------ src/qemu/qemu_hotplug.c | 7 +++--- 3 files changed, 46 insertions(+), 39 deletions(-)
Why not have qemuBuildMemoryBackendStr take a virDomainMemoryDefPtr? In qemuBuildMemoryCellBackendStr you could create one on the stack (virDomainMemoryDef mem = {0};) filling in memsize and cell appropriately before calling... Probably makes a subsequent patch easier too...
Sure. Will work on that.
Perhaps a multistep process to remove the "extra" args and pass the pointer instead... Then alter the order of the args afterwards.
Agreed.
Although I think perhaps the current "force" is misnamed and could be a output too...
Not quite sure why we want to do that.
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 41eecfd18..7c52712d1 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3077,38 +3077,47 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd,
/** * qemuBuildMemoryBackendStr: + * @backendProps: [out] constructed object + * @backendType: [out] type of the backennd used
s/backennd/backend
Why have [out] parameters first and not last? Sure that nasty force flag is at the end now, but that could move. Not that we have a standard for this ~/~ yet...
I guess this an old habit from glib world. For instance: http://libvirt.org/git/?p=libvirt-glib.git;a=blob;f=libvirt-gobject/libvirt-...
+ * @cfg: qemu driver config object + * @qemuCaps: qemu capabilities object + * @def: domain definition object * @size: size of the memory device in kibibytes * @pagesize: size of the requested memory page in KiB, 0 for default * @guestNode: NUMA node in the guest that the memory object will be attached * to, or -1 if NUMA is not used in the guest - * @hostNodes: map of host nodes to alloc the memory in, NULL for default - * @autoNodeset: fallback nodeset in case of automatic numa placement - * @def: domain definition object - * @qemuCaps: qemu capabilities object - * @cfg: qemu driver config object - * @aliasPrefix: prefix string of the alias (to allow for multiple frontents) - * @id: index of the device (to construct the alias) - * @backendStr: returns the object string + * @userNodeset: user requested map of host nodes to alloc the memory on, NULL + * for default + * @autoNodeset: fallback nodeset in case of automatic NUMA placement + * @force: forcibly use one of the backends
I see that commit '1c4f3b5' allows the code to change "force" from it's passed value (from false to true)... Really makes things quite ugly... and it seems about to get even uglier.
Yeah, I have a fix for that too.
That seemingly would only affect qemuBuildMemoryCellBackendStr since it's the only one passing 'false', but does alter some of the following description.
* - * Formats the configuration string for the memory device backend according - * to the configuration. @pagesize and @hostNodes can be used to override the - * default source configuration, both are optional. + * Creates a configuration object that represents memory backend of given guest + * NUMA node (domain @def and @guestNode) of size @size. @pagesize can be used + * to override configured size of hugepages. Use @userNodeset and @autoNodeset + * to fine tune the placement of the memory on the host NUMA nodes. * - * Returns 0 on success, 1 if only the implicit memory-device-ram with no - * other configuration was used (to detect legacy configurations). Returns - * -1 in case of an error. + * By default, if no memory-backend-* object is necessary to fulfil the guest + * configuration value of 1 is returned. This behaviour can be suppressed by + * setting @force to true in which case 0 would be returned. + * + * Then, if one of the two memory-backend-* should be used, the @qemuCaps is + * consulted to check if qemu does support it. + * + * Returns: 0 on success, + * 1 on success and if there's no need to use memory-backend-* + * -1 on error. */
Perhaps the name 'force' is bad and could be turned into an output value too.... That way it's a 0 or -1 return value and there's a "bool *need_backend;"... I'm sure the logic could be reworked to check if someone cares (e.g. a NULL could be passed) and if they do care, then set the value based on whether or not it's needed.
Not quite sure why we should do this. Lets save that for a follow up patch.
One other NIT with the existing code... there's a check "&& !memAccess" of course that should be (memAccess != VIR_DOMAIN_MEMORY_ACCESS_DEFAULT)...
Okay, will fix that as well. Michal

NVDIMM is new type of memory introduced into QEMU 2.6. The idea is that we have a Non-Volatile memory module that keeps the data persistent across domain reboots. At the domain XML level, we already have some representation of 'dimm' modules. Long story short, we have <memory/> element that lives under <devices/>. Now, the element even has @model attribute which we can use to introduce new memory type: <memory model='nvdimm'> <source> <path>/tmp/nvdimm</path> </source> <target> <size unit='KiB'>523264</size> <node>0</node> </target> <address type='dimm' slot='0'/> </memory> So far, this is just a XML parser/formatter extension. QEMU driver implementation is in the next commit. For more info on NVDIMM visit the following web page: http://pmem.io/ Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 55 ++++++++---- docs/schemas/domaincommon.rng | 32 ++++--- src/conf/domain_conf.c | 97 ++++++++++++++++------ src/conf/domain_conf.h | 2 + src/qemu/qemu_command.c | 6 ++ src/qemu/qemu_domain.c | 5 ++ .../qemuxml2argv-memory-hotplug-nvdimm.xml | 56 +++++++++++++ .../qemuxml2xmlout-memory-hotplug-nvdimm.xml | 1 + tests/qemuxml2xmltest.c | 1 + 9 files changed, 204 insertions(+), 51 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 02ce7924c..b76905cdc 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7007,7 +7007,6 @@ qemu-kvm -net nic,model=? /dev/null guests' memory resource needs. Some hypervisors may require NUMA configured for the guest. - <span class="since">Since 1.2.14</span> </p> <p> @@ -7032,6 +7031,15 @@ qemu-kvm -net nic,model=? /dev/null <node>1</node> </target> </memory> + <memory model='nvdimm'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>524288</size> + <node>1</node> + </target> + </memory> </devices> ... </pre> @@ -7039,28 +7047,47 @@ qemu-kvm -net nic,model=? /dev/null <dt><code>model</code></dt> <dd> <p> - Currently only the <code>dimm</code> model is supported in order to - add a virtual DIMM module to the guest. + Provide <code>dimm</code> to add a virtual DIMM module to the guest. + <span class="since">Since 1.2.14</span> + Provide <code>nvdimm</code> model adds a Non-Volatile DIMM + module. <span class="since">Since 3.1.0</span> </p> </dd> <dt><code>source</code></dt> <dd> <p> - The optional source element allows to fine tune the source of the - memory used for the given memory device. If the element is not - provided defaults configured via <code>numatune</code> are used. + For model <code>dimm</code> this element is optional and allows to + fine tune the source of the memory used for the given memory device. + If the element is not provided defaults configured via + <code>numatune</code> are used. If <code>dimm</code> is provided, + then the following optional elements can be provided as well: </p> - <p> - <code>pagesize</code> can optionally be used to override the default - host page size used for backing the memory device. - The configured value must correspond to a page size supported by the - host. - </p> + <dl> + <dt><code>pagesize</code></dt> + <dd> + <p> + This element can optionally be used to override the default + host page size used for backing the memory device. + The configured value must correspond to a page size supported by the + host. + </p> + </dd> + + <dt><code>nodemask</code></dt> + <dd> + <p> + This element can optionally be used to override the default + set of NUMA nodes where the memory would be allocated. + </p> + </dd> + </dl> + <p> - <code>nodemask</code> can optionally be used to override the default - set of NUMA nodes where the memory would be allocated. + For model <code>nvdimm</code> this element is mandatory and has a + single child element <code>path</code> that represents a path + in the host that backs the nvdimm module in the guest. </p> </dd> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index c64544ac4..fafd3e982 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4737,6 +4737,7 @@ <attribute name="model"> <choice> <value>dimm</value> + <value>nvdimm</value> </choice> </attribute> <interleave> @@ -4756,18 +4757,27 @@ <define name="memorydev-source"> <element name="source"> - <interleave> - <optional> - <element name="pagesize"> - <ref name="scaledInteger"/> + <choice> + <group> + <interleave> + <optional> + <element name="pagesize"> + <ref name="scaledInteger"/> + </element> + </optional> + <optional> + <element name="nodemask"> + <ref name="cpuset"/> + </element> + </optional> + </interleave> + </group> + <group> + <element name="path"> + <text/> </element> - </optional> - <optional> - <element name="nodemask"> - <ref name="cpuset"/> - </element> - </optional> - </interleave> + </group> + </choice> </element> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 97d42fe99..4ffca7dc8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -858,8 +858,11 @@ VIR_ENUM_DECL(virDomainBlockJob) VIR_ENUM_IMPL(virDomainBlockJob, VIR_DOMAIN_BLOCK_JOB_TYPE_LAST, "", "", "copy", "", "active-commit") -VIR_ENUM_IMPL(virDomainMemoryModel, VIR_DOMAIN_MEMORY_MODEL_LAST, - "", "dimm") +VIR_ENUM_IMPL(virDomainMemoryModel, + VIR_DOMAIN_MEMORY_MODEL_LAST, + "", + "dimm", + "nvdimm") VIR_ENUM_IMPL(virDomainShmemModel, VIR_DOMAIN_SHMEM_MODEL_LAST, "ivshmem", @@ -2418,6 +2421,7 @@ void virDomainMemoryDefFree(virDomainMemoryDefPtr def) if (!def) return; + VIR_FREE(def->path); virBitmapFree(def->sourceNodes); virDomainDeviceInfoClear(&def->info); VIR_FREE(def); @@ -13755,20 +13759,36 @@ virDomainMemorySourceDefParseXML(xmlNodePtr node, xmlNodePtr save = ctxt->node; ctxt->node = node; - if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt, - &def->pagesize, false, false) < 0) - goto cleanup; - - if ((nodemask = virXPathString("string(./nodemask)", ctxt))) { - if (virBitmapParse(nodemask, &def->sourceNodes, - VIR_DOMAIN_CPUMASK_LEN) < 0) + switch ((virDomainMemoryModel) def->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt, + &def->pagesize, false, false) < 0) goto cleanup; - if (virBitmapIsAllClear(def->sourceNodes)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Invalid value of 'nodemask': %s"), nodemask); + if ((nodemask = virXPathString("string(./nodemask)", ctxt))) { + if (virBitmapParse(nodemask, &def->sourceNodes, + VIR_DOMAIN_CPUMASK_LEN) < 0) + goto cleanup; + + if (virBitmapIsAllClear(def->sourceNodes)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Invalid value of 'nodemask': %s"), nodemask); + goto cleanup; + } + } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (!(def->path = virXPathString("string(./path)", ctxt))) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("path is required for model nvdimm'")); goto cleanup; } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; } ret = 0; @@ -15173,12 +15193,25 @@ virDomainMemoryFindByDefInternal(virDomainDefPtr def, tmp->size != mem->size) continue; - /* source stuff -> match with device */ - if (tmp->pagesize != mem->pagesize) - continue; + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + /* source stuff -> match with device */ + if (tmp->pagesize != mem->pagesize) + continue; - if (!virBitmapEqual(tmp->sourceNodes, mem->sourceNodes)) - continue; + if (!virBitmapEqual(tmp->sourceNodes, mem->sourceNodes)) + continue; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (STRNEQ(tmp->path, mem->path)) + continue; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } break; } @@ -22571,23 +22604,35 @@ virDomainMemorySourceDefFormat(virBufferPtr buf, char *bitmap = NULL; int ret = -1; - if (!def->pagesize && !def->sourceNodes) + if (!def->pagesize && !def->sourceNodes && !def->path) return 0; virBufferAddLit(buf, "<source>\n"); virBufferAdjustIndent(buf, 2); - if (def->sourceNodes) { - if (!(bitmap = virBitmapFormat(def->sourceNodes))) - goto cleanup; + switch ((virDomainMemoryModel) def->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (def->sourceNodes) { + if (!(bitmap = virBitmapFormat(def->sourceNodes))) + goto cleanup; - virBufferAsprintf(buf, "<nodemask>%s</nodemask>\n", bitmap); + virBufferAsprintf(buf, "<nodemask>%s</nodemask>\n", bitmap); + } + + if (def->pagesize) + virBufferAsprintf(buf, "<pagesize unit='KiB'>%llu</pagesize>\n", + def->pagesize); + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virBufferAsprintf(buf, "<path>%s</path>\n", def->path); + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; } - if (def->pagesize) - virBufferAsprintf(buf, "<pagesize unit='KiB'>%llu</pagesize>\n", - def->pagesize); - virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</source>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 1e53cc328..dc949d3c9 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1996,6 +1996,7 @@ struct _virDomainRNGDef { typedef enum { VIR_DOMAIN_MEMORY_MODEL_NONE, VIR_DOMAIN_MEMORY_MODEL_DIMM, /* dimm hotpluggable memory device */ + VIR_DOMAIN_MEMORY_MODEL_NVDIMM, /* nvdimm memory device */ VIR_DOMAIN_MEMORY_MODEL_LAST } virDomainMemoryModel; @@ -2004,6 +2005,7 @@ struct _virDomainMemoryDef { /* source */ virBitmapPtr sourceNodes; unsigned long long pagesize; /* kibibytes */ + char *path; /* target */ int model; /* virDomainMemoryModel */ diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 7c52712d1..f628a9929 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3421,6 +3421,12 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) break; + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virReportError(VIR_ERR_NO_SUPPORT, "%s", + _("nvdimm not supported yet")); + return NULL; + break; + case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: break; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index c187214dc..5ec610564 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -5910,6 +5910,11 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, } break; + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm hotplug not supported yet")); + return -1; + case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: return -1; diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml new file mode 100644 index 000000000..1578db453 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml @@ -0,0 +1,56 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory> + <memory unit='KiB'>1267710</memory> + <currentMemory unit='KiB'>1267710</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <idmap> + <uid start='0' target='1000' count='10'/> + <gid start='0' target='1000' count='10'/> + </idmap> + <cpu> + <topology sockets='2' cores='1' threads='1'/> + <numa> + <cell id='0' cpus='0-1' memory='1048576' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + <memory model='nvdimm'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>523264</size> + <node>0</node> + </target> + <address type='dimm' slot='0'/> + </memory> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml new file mode 120000 index 000000000..4cac477a9 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml @@ -0,0 +1 @@ +../qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 4353ad245..e1c341dd5 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1078,6 +1078,7 @@ mymain(void) DO_TEST("memory-hotplug", NONE); DO_TEST("memory-hotplug-nonuma", NONE); DO_TEST("memory-hotplug-dimm", NONE); + DO_TEST("memory-hotplug-nvdimm", NONE); DO_TEST("net-udp", NONE); DO_TEST("video-virtio-gpu-device", NONE); -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
NVDIMM is new type of memory introduced into QEMU 2.6. The idea is that we have a Non-Volatile memory module that keeps the data persistent across domain reboots.
At the domain XML level, we already have some representation of 'dimm' modules. Long story short, we have <memory/> element that lives under <devices/>. Now, the element even has @model attribute which we can use to introduce new memory type:
Starting with "Long story short..." how about instead: NVDIMM will utilize the existing <memory/> element that lives under <devices/> by adding a new attribute 'nvdimm' to the existing @model and introduce a new <path/> element for <source/> while reusing other fields. The resulting XML would appear as:
<memory model='nvdimm'> <source> <path>/tmp/nvdimm</path> </source> <target> <size unit='KiB'>523264</size> <node>0</node> </target> <address type='dimm' slot='0'/> </memory>
So far, this is just a XML parser/formatter extension. QEMU driver implementation is in the next commit.
For more info on NVDIMM visit the following web page:
Are there "limitations" to the number of nvdimm's that could be supported in a guest? Mostly curious, but it's one of those I thought I read something type, but cannot remember where type things...
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 55 ++++++++---- docs/schemas/domaincommon.rng | 32 ++++--- src/conf/domain_conf.c | 97 ++++++++++++++++------ src/conf/domain_conf.h | 2 + src/qemu/qemu_command.c | 6 ++ src/qemu/qemu_domain.c | 5 ++ .../qemuxml2argv-memory-hotplug-nvdimm.xml | 56 +++++++++++++ .../qemuxml2xmlout-memory-hotplug-nvdimm.xml | 1 + tests/qemuxml2xmltest.c | 1 + 9 files changed, 204 insertions(+), 51 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 02ce7924c..b76905cdc 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7007,7 +7007,6 @@ qemu-kvm -net nic,model=? /dev/null guests' memory resource needs.
Some hypervisors may require NUMA configured for the guest. - <span class="since">Since 1.2.14</span> </p>
<p> @@ -7032,6 +7031,15 @@ qemu-kvm -net nic,model=? /dev/null <node>1</node> </target> </memory> + <memory model='nvdimm'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>524288</size> + <node>1</node> + </target> + </memory> </devices> ... </pre> @@ -7039,28 +7047,47 @@ qemu-kvm -net nic,model=? /dev/null <dt><code>model</code></dt> <dd> <p> - Currently only the <code>dimm</code> model is supported in order to - add a virtual DIMM module to the guest. + Provide <code>dimm</code> to add a virtual DIMM module to the guest. + <span class="since">Since 1.2.14</span> + Provide <code>nvdimm</code> model adds a Non-Volatile DIMM + module. <span class="since">Since 3.1.0</span>
will be at least 3.2.0
</p> </dd>
<dt><code>source</code></dt> <dd> <p> - The optional source element allows to fine tune the source of the - memory used for the given memory device. If the element is not - provided defaults configured via <code>numatune</code> are used. + For model <code>dimm</code> this element is optional and allows to + fine tune the source of the memory used for the given memory device. + If the element is not provided defaults configured via + <code>numatune</code> are used. If <code>dimm</code> is provided, + then the following optional elements can be provided as well: </p> - <p> - <code>pagesize</code> can optionally be used to override the default - host page size used for backing the memory device.
- The configured value must correspond to a page size supported by the - host. - </p> + <dl> + <dt><code>pagesize</code></dt> + <dd> + <p> + This element can optionally be used to override the default
I think at this point optionally is redundant since the above says "the following optional elements":
+ host page size used for backing the memory device. + The configured value must correspond to a page size supported by the + host. + </p> + </dd> + + <dt><code>nodemask</code></dt> + <dd> + <p> + This element can optionally be used to override the default
same w/r/t optionally
+ set of NUMA nodes where the memory would be allocated. + </p> + </dd> + </dl> + <p> - <code>nodemask</code> can optionally be used to override the default - set of NUMA nodes where the memory would be allocated. + For model <code>nvdimm</code> this element is mandatory and has a + single child element <code>path</code> that represents a path + in the host that backs the nvdimm module in the guest. </p> </dd>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index c64544ac4..fafd3e982 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4737,6 +4737,7 @@ <attribute name="model"> <choice> <value>dimm</value> + <value>nvdimm</value> </choice> </attribute> <interleave> @@ -4756,18 +4757,27 @@
<define name="memorydev-source"> <element name="source"> - <interleave> - <optional> - <element name="pagesize"> - <ref name="scaledInteger"/> + <choice> + <group> + <interleave> + <optional> + <element name="pagesize"> + <ref name="scaledInteger"/> + </element> + </optional> + <optional> + <element name="nodemask"> + <ref name="cpuset"/> + </element> + </optional> + </interleave> + </group> + <group> + <element name="path"> + <text/>
I would think this should be: <ref name="absFilePath"/>
</element> - </optional> - <optional> - <element name="nodemask"> - <ref name="cpuset"/> - </element> - </optional> - </interleave> + </group> + </choice> </element> </define>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 97d42fe99..4ffca7dc8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -858,8 +858,11 @@ VIR_ENUM_DECL(virDomainBlockJob) VIR_ENUM_IMPL(virDomainBlockJob, VIR_DOMAIN_BLOCK_JOB_TYPE_LAST, "", "", "copy", "", "active-commit")
-VIR_ENUM_IMPL(virDomainMemoryModel, VIR_DOMAIN_MEMORY_MODEL_LAST, - "", "dimm") +VIR_ENUM_IMPL(virDomainMemoryModel, + VIR_DOMAIN_MEMORY_MODEL_LAST, + "", + "dimm", + "nvdimm")
VIR_ENUM_IMPL(virDomainShmemModel, VIR_DOMAIN_SHMEM_MODEL_LAST, "ivshmem", @@ -2418,6 +2421,7 @@ void virDomainMemoryDefFree(virDomainMemoryDefPtr def) if (!def) return;
+ VIR_FREE(def->path); virBitmapFree(def->sourceNodes); virDomainDeviceInfoClear(&def->info); VIR_FREE(def); @@ -13755,20 +13759,36 @@ virDomainMemorySourceDefParseXML(xmlNodePtr node, xmlNodePtr save = ctxt->node; ctxt->node = node;
- if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt, - &def->pagesize, false, false) < 0) - goto cleanup; - - if ((nodemask = virXPathString("string(./nodemask)", ctxt))) { - if (virBitmapParse(nodemask, &def->sourceNodes, - VIR_DOMAIN_CPUMASK_LEN) < 0) + switch ((virDomainMemoryModel) def->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt, + &def->pagesize, false, false) < 0) goto cleanup;
- if (virBitmapIsAllClear(def->sourceNodes)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Invalid value of 'nodemask': %s"), nodemask); + if ((nodemask = virXPathString("string(./nodemask)", ctxt))) { + if (virBitmapParse(nodemask, &def->sourceNodes, + VIR_DOMAIN_CPUMASK_LEN) < 0) + goto cleanup; + + if (virBitmapIsAllClear(def->sourceNodes)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Invalid value of 'nodemask': %s"), nodemask); + goto cleanup; + } + } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (!(def->path = virXPathString("string(./path)", ctxt))) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("path is required for model nvdimm'")); goto cleanup; } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; }
ret = 0; @@ -15173,12 +15193,25 @@ virDomainMemoryFindByDefInternal(virDomainDefPtr def, tmp->size != mem->size) continue;
- /* source stuff -> match with device */ - if (tmp->pagesize != mem->pagesize) - continue; + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + /* source stuff -> match with device */ + if (tmp->pagesize != mem->pagesize) + continue;
- if (!virBitmapEqual(tmp->sourceNodes, mem->sourceNodes)) - continue; + if (!virBitmapEqual(tmp->sourceNodes, mem->sourceNodes)) + continue; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (STRNEQ(tmp->path, mem->path)) + continue; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + }
break; } @@ -22571,23 +22604,35 @@ virDomainMemorySourceDefFormat(virBufferPtr buf, char *bitmap = NULL; int ret = -1;
- if (!def->pagesize && !def->sourceNodes) + if (!def->pagesize && !def->sourceNodes && !def->path) return 0;
virBufferAddLit(buf, "<source>\n"); virBufferAdjustIndent(buf, 2);
- if (def->sourceNodes) { - if (!(bitmap = virBitmapFormat(def->sourceNodes))) - goto cleanup; + switch ((virDomainMemoryModel) def->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (def->sourceNodes) { + if (!(bitmap = virBitmapFormat(def->sourceNodes))) + goto cleanup;
- virBufferAsprintf(buf, "<nodemask>%s</nodemask>\n", bitmap); + virBufferAsprintf(buf, "<nodemask>%s</nodemask>\n", bitmap); + } + + if (def->pagesize) + virBufferAsprintf(buf, "<pagesize unit='KiB'>%llu</pagesize>\n", + def->pagesize); + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virBufferAsprintf(buf, "<path>%s</path>\n", def->path); + break;
This will need to be escaped properly (any weirdness with ,, too?). FWIW: I'm using 'romfile' (the nvram property) as my reference.
+ + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; }
- if (def->pagesize) - virBufferAsprintf(buf, "<pagesize unit='KiB'>%llu</pagesize>\n", - def->pagesize); - virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</source>\n");
So no need
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 1e53cc328..dc949d3c9 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1996,6 +1996,7 @@ struct _virDomainRNGDef { typedef enum { VIR_DOMAIN_MEMORY_MODEL_NONE, VIR_DOMAIN_MEMORY_MODEL_DIMM, /* dimm hotpluggable memory device */ + VIR_DOMAIN_MEMORY_MODEL_NVDIMM, /* nvdimm memory device */
VIR_DOMAIN_MEMORY_MODEL_LAST } virDomainMemoryModel; @@ -2004,6 +2005,7 @@ struct _virDomainMemoryDef { /* source */ virBitmapPtr sourceNodes; unsigned long long pagesize; /* kibibytes */ + char *path;
Since it's "hard" to find path in sources in a simple (hah) search, can this be nvdimm_path or something more specific? I would also think there'd need to be some Mem ABI Stability check added in virDomainMemoryDefCheckABIStability that the src/dst path's are the same. Searching on 'nmems': 1. Will virDomainDefPostParseMemory need any adjustment to not account for this type of memory or should it be included? 2. Similar for virDomainDefGetMemoryInitial 3. Will alignment be needed? qemuDomainAlignMemorySizes
/* target */ int model; /* virDomainMemoryModel */ diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 7c52712d1..f628a9929 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3421,6 +3421,12 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem)
break;
+ case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virReportError(VIR_ERR_NO_SUPPORT, "%s", + _("nvdimm not supported yet")); + return NULL; + break; + case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: break; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index c187214dc..5ec610564 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -5910,6 +5910,11 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, } break;
+ case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm hotplug not supported yet")); + return -1; + case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: return -1;
What about migration support? That would assume the file exists in both places or is somehow passed from source to target, right? John
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml new file mode 100644 index 000000000..1578db453 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml @@ -0,0 +1,56 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory> + <memory unit='KiB'>1267710</memory> + <currentMemory unit='KiB'>1267710</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <idmap> + <uid start='0' target='1000' count='10'/> + <gid start='0' target='1000' count='10'/> + </idmap> + <cpu> + <topology sockets='2' cores='1' threads='1'/> + <numa> + <cell id='0' cpus='0-1' memory='1048576' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + <memory model='nvdimm'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>523264</size> + <node>0</node> + </target> + <address type='dimm' slot='0'/> + </memory> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml new file mode 120000 index 000000000..4cac477a9 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml @@ -0,0 +1 @@ +../qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 4353ad245..e1c341dd5 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1078,6 +1078,7 @@ mymain(void) DO_TEST("memory-hotplug", NONE); DO_TEST("memory-hotplug-nonuma", NONE); DO_TEST("memory-hotplug-dimm", NONE); + DO_TEST("memory-hotplug-nvdimm", NONE); DO_TEST("net-udp", NONE);
DO_TEST("video-virtio-gpu-device", NONE);

[...]
virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</source>\n");
So no need
Looks like this lost thought got somehow cut-n-paste below ...
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 1e53cc328..dc949d3c9 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1996,6 +1996,7 @@ struct _virDomainRNGDef { typedef enum { VIR_DOMAIN_MEMORY_MODEL_NONE, VIR_DOMAIN_MEMORY_MODEL_DIMM, /* dimm hotpluggable memory device */ + VIR_DOMAIN_MEMORY_MODEL_NVDIMM, /* nvdimm memory device */
VIR_DOMAIN_MEMORY_MODEL_LAST } virDomainMemoryModel; @@ -2004,6 +2005,7 @@ struct _virDomainMemoryDef { /* source */ virBitmapPtr sourceNodes; unsigned long long pagesize; /* kibibytes */ + char *path;
Since it's "hard" to find path in sources in a simple (hah) search, can this be nvdimm_path or something more specific?
As in 'here'...
I would also think there'd need to be some Mem ABI Stability check added in virDomainMemoryDefCheckABIStability that the src/dst path's are the same.
Searching on 'nmems':
1. Will virDomainDefPostParseMemory need any adjustment to not account for this type of memory or should it be included?
2. Similar for virDomainDefGetMemoryInitial
3. Will alignment be needed? qemuDomainAlignMemorySizes
[...] John

On 03/07/2017 05:01 PM, John Ferlan wrote:
On 02/27/2017 08:19 AM, Michal Privoznik wrote:
NVDIMM is new type of memory introduced into QEMU 2.6. The idea is that we have a Non-Volatile memory module that keeps the data persistent across domain reboots.
At the domain XML level, we already have some representation of 'dimm' modules. Long story short, we have <memory/> element that lives under <devices/>. Now, the element even has @model attribute which we can use to introduce new memory type:
Starting with "Long story short..."
how about instead:
NVDIMM will utilize the existing <memory/> element that lives under <devices/> by adding a new attribute 'nvdimm' to the existing @model and introduce a new <path/> element for <source/> while reusing other fields. The resulting XML would appear as:
<memory model='nvdimm'> <source> <path>/tmp/nvdimm</path> </source> <target> <size unit='KiB'>523264</size> <node>0</node> </target> <address type='dimm' slot='0'/> </memory>
So far, this is just a XML parser/formatter extension. QEMU driver implementation is in the next commit.
For more info on NVDIMM visit the following web page:
Are there "limitations" to the number of nvdimm's that could be supported in a guest? Mostly curious, but it's one of those I thought I read something type, but cannot remember where type things...
Not at this level. I mean, there are no limitations in the NVDIM specification. There are some on qemu level, i.e. the minimum for label-size of NVDIMM is 128KiB, and the remaining size (total - label) should be aligned to 4k. But if we are gonna check this we should do so at qemu level, not here. https://www.redhat.com/archives/libvir-list/2016-August/msg00650.html
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 55 ++++++++---- docs/schemas/domaincommon.rng | 32 ++++--- src/conf/domain_conf.c | 97 ++++++++++++++++------ src/conf/domain_conf.h | 2 + src/qemu/qemu_command.c | 6 ++ src/qemu/qemu_domain.c | 5 ++ .../qemuxml2argv-memory-hotplug-nvdimm.xml | 56 +++++++++++++ .../qemuxml2xmlout-memory-hotplug-nvdimm.xml | 1 + tests/qemuxml2xmltest.c | 1 + 9 files changed, 204 insertions(+), 51 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 02ce7924c..b76905cdc 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7007,7 +7007,6 @@ qemu-kvm -net nic,model=? /dev/null guests' memory resource needs.
Some hypervisors may require NUMA configured for the guest. - <span class="since">Since 1.2.14</span> </p>
<p> @@ -7032,6 +7031,15 @@ qemu-kvm -net nic,model=? /dev/null <node>1</node> </target> </memory> + <memory model='nvdimm'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>524288</size> + <node>1</node> + </target> + </memory> </devices> ... </pre> @@ -7039,28 +7047,47 @@ qemu-kvm -net nic,model=? /dev/null <dt><code>model</code></dt> <dd> <p> - Currently only the <code>dimm</code> model is supported in order to - add a virtual DIMM module to the guest. + Provide <code>dimm</code> to add a virtual DIMM module to the guest. + <span class="since">Since 1.2.14</span> + Provide <code>nvdimm</code> model adds a Non-Volatile DIMM + module. <span class="since">Since 3.1.0</span>
will be at least 3.2.0
</p> </dd>
<dt><code>source</code></dt> <dd> <p> - The optional source element allows to fine tune the source of the - memory used for the given memory device. If the element is not - provided defaults configured via <code>numatune</code> are used. + For model <code>dimm</code> this element is optional and allows to + fine tune the source of the memory used for the given memory device. + If the element is not provided defaults configured via + <code>numatune</code> are used. If <code>dimm</code> is provided, + then the following optional elements can be provided as well: </p> - <p> - <code>pagesize</code> can optionally be used to override the default - host page size used for backing the memory device.
- The configured value must correspond to a page size supported by the - host. - </p> + <dl> + <dt><code>pagesize</code></dt> + <dd> + <p> + This element can optionally be used to override the default
I think at this point optionally is redundant since the above says "the following optional elements":
+ host page size used for backing the memory device. + The configured value must correspond to a page size supported by the + host. + </p> + </dd> + + <dt><code>nodemask</code></dt> + <dd> + <p> + This element can optionally be used to override the default
same w/r/t optionally
+ set of NUMA nodes where the memory would be allocated. + </p> + </dd> + </dl> + <p> - <code>nodemask</code> can optionally be used to override the default - set of NUMA nodes where the memory would be allocated. + For model <code>nvdimm</code> this element is mandatory and has a + single child element <code>path</code> that represents a path + in the host that backs the nvdimm module in the guest. </p> </dd>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index c64544ac4..fafd3e982 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4737,6 +4737,7 @@ <attribute name="model"> <choice> <value>dimm</value> + <value>nvdimm</value> </choice> </attribute> <interleave> @@ -4756,18 +4757,27 @@
<define name="memorydev-source"> <element name="source"> - <interleave> - <optional> - <element name="pagesize"> - <ref name="scaledInteger"/> + <choice> + <group> + <interleave> + <optional> + <element name="pagesize"> + <ref name="scaledInteger"/> + </element> + </optional> + <optional> + <element name="nodemask"> + <ref name="cpuset"/> + </element> + </optional> + </interleave> + </group> + <group> + <element name="path"> + <text/>
I would think this should be:
<ref name="absFilePath"/>
</element> - </optional> - <optional> - <element name="nodemask"> - <ref name="cpuset"/> - </element> - </optional> - </interleave> + </group> + </choice> </element> </define>
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 97d42fe99..4ffca7dc8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -858,8 +858,11 @@ VIR_ENUM_DECL(virDomainBlockJob) VIR_ENUM_IMPL(virDomainBlockJob, VIR_DOMAIN_BLOCK_JOB_TYPE_LAST, "", "", "copy", "", "active-commit")
-VIR_ENUM_IMPL(virDomainMemoryModel, VIR_DOMAIN_MEMORY_MODEL_LAST, - "", "dimm") +VIR_ENUM_IMPL(virDomainMemoryModel, + VIR_DOMAIN_MEMORY_MODEL_LAST, + "", + "dimm", + "nvdimm")
VIR_ENUM_IMPL(virDomainShmemModel, VIR_DOMAIN_SHMEM_MODEL_LAST, "ivshmem", @@ -2418,6 +2421,7 @@ void virDomainMemoryDefFree(virDomainMemoryDefPtr def) if (!def) return;
+ VIR_FREE(def->path); virBitmapFree(def->sourceNodes); virDomainDeviceInfoClear(&def->info); VIR_FREE(def); @@ -13755,20 +13759,36 @@ virDomainMemorySourceDefParseXML(xmlNodePtr node, xmlNodePtr save = ctxt->node; ctxt->node = node;
- if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt, - &def->pagesize, false, false) < 0) - goto cleanup; - - if ((nodemask = virXPathString("string(./nodemask)", ctxt))) { - if (virBitmapParse(nodemask, &def->sourceNodes, - VIR_DOMAIN_CPUMASK_LEN) < 0) + switch ((virDomainMemoryModel) def->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (virDomainParseMemory("./pagesize", "./pagesize/@unit", ctxt, + &def->pagesize, false, false) < 0) goto cleanup;
- if (virBitmapIsAllClear(def->sourceNodes)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("Invalid value of 'nodemask': %s"), nodemask); + if ((nodemask = virXPathString("string(./nodemask)", ctxt))) { + if (virBitmapParse(nodemask, &def->sourceNodes, + VIR_DOMAIN_CPUMASK_LEN) < 0) + goto cleanup; + + if (virBitmapIsAllClear(def->sourceNodes)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Invalid value of 'nodemask': %s"), nodemask); + goto cleanup; + } + } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (!(def->path = virXPathString("string(./path)", ctxt))) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("path is required for model nvdimm'")); goto cleanup; } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; }
ret = 0; @@ -15173,12 +15193,25 @@ virDomainMemoryFindByDefInternal(virDomainDefPtr def, tmp->size != mem->size) continue;
- /* source stuff -> match with device */ - if (tmp->pagesize != mem->pagesize) - continue; + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + /* source stuff -> match with device */ + if (tmp->pagesize != mem->pagesize) + continue;
- if (!virBitmapEqual(tmp->sourceNodes, mem->sourceNodes)) - continue; + if (!virBitmapEqual(tmp->sourceNodes, mem->sourceNodes)) + continue; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (STRNEQ(tmp->path, mem->path)) + continue; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + }
break; } @@ -22571,23 +22604,35 @@ virDomainMemorySourceDefFormat(virBufferPtr buf, char *bitmap = NULL; int ret = -1;
- if (!def->pagesize && !def->sourceNodes) + if (!def->pagesize && !def->sourceNodes && !def->path) return 0;
virBufferAddLit(buf, "<source>\n"); virBufferAdjustIndent(buf, 2);
- if (def->sourceNodes) { - if (!(bitmap = virBitmapFormat(def->sourceNodes))) - goto cleanup; + switch ((virDomainMemoryModel) def->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (def->sourceNodes) { + if (!(bitmap = virBitmapFormat(def->sourceNodes))) + goto cleanup;
- virBufferAsprintf(buf, "<nodemask>%s</nodemask>\n", bitmap); + virBufferAsprintf(buf, "<nodemask>%s</nodemask>\n", bitmap); + } + + if (def->pagesize) + virBufferAsprintf(buf, "<pagesize unit='KiB'>%llu</pagesize>\n", + def->pagesize); + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virBufferAsprintf(buf, "<path>%s</path>\n", def->path); + break;
This will need to be escaped properly (any weirdness with ,, too?). FWIW: I'm using 'romfile' (the nvram property) as my reference.
+ + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; }
- if (def->pagesize) - virBufferAsprintf(buf, "<pagesize unit='KiB'>%llu</pagesize>\n", - def->pagesize); - virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</source>\n");
So no need
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 1e53cc328..dc949d3c9 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1996,6 +1996,7 @@ struct _virDomainRNGDef { typedef enum { VIR_DOMAIN_MEMORY_MODEL_NONE, VIR_DOMAIN_MEMORY_MODEL_DIMM, /* dimm hotpluggable memory device */ + VIR_DOMAIN_MEMORY_MODEL_NVDIMM, /* nvdimm memory device */
VIR_DOMAIN_MEMORY_MODEL_LAST } virDomainMemoryModel; @@ -2004,6 +2005,7 @@ struct _virDomainMemoryDef { /* source */ virBitmapPtr sourceNodes; unsigned long long pagesize; /* kibibytes */ + char *path;
Since it's "hard" to find path in sources in a simple (hah) search, can this be nvdimm_path or something more specific?
Okay, nvdimmPath it is.
I would also think there'd need to be some Mem ABI Stability check added in virDomainMemoryDefCheckABIStability that the src/dst path's are the same.
No. This is not needed. The contents of NVDIMM should be synced upon migration. Just like NVRAM for UEFI is. Therefore, on migration the backing file for NVDIMM can change and qemu should cope with it just fine.
Searching on 'nmems':
1. Will virDomainDefPostParseMemory need any adjustment to not account for this type of memory or should it be included?
It should be included. This is one of the nice benefits of this feature - it is a DIMM module after all. Therefore you have to have enough memory slots available, the size of NVDIMM module is accounted in the overal memory size of the guest, and so on.
2. Similar for virDomainDefGetMemoryInitial
Again. We want to account NVDIMM here. This is correct.
3. Will alignment be needed? qemuDomainAlignMemorySizes
Yes. That's why the code is dimm type agnostic.
/* target */ int model; /* virDomainMemoryModel */ diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 7c52712d1..f628a9929 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3421,6 +3421,12 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem)
break;
+ case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virReportError(VIR_ERR_NO_SUPPORT, "%s", + _("nvdimm not supported yet")); + return NULL; + break; + case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: break; diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index c187214dc..5ec610564 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -5910,6 +5910,11 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, } break;
+ case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm hotplug not supported yet")); + return -1; + case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: return -1;
What about migration support? That would assume the file exists in both places or is somehow passed from source to target, right?
It should be enough that the file exists on the destination (regardless of its initial content) and qemu overwrites its content during migration. Libvirt does not pre-create the file on migration, just like it doesn't pre-create it on domain startup. Michal

Introduce a qemu capability for -device nvdimm. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_capabilities.c | 9 +++++++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 10 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 6f60a00ef..a9a2705a6 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -358,6 +358,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "query-cpu-model-expansion", /* 245 */ "virtio-net.host_mtu", "spice-rendernode", + "nvdimm", ); @@ -1625,6 +1626,7 @@ struct virQEMUCapsStringFlags virQEMUCapsObjectTypes[] = { { "ivshmem-plain", QEMU_CAPS_DEVICE_IVSHMEM_PLAIN }, { "ivshmem-doorbell", QEMU_CAPS_DEVICE_IVSHMEM_DOORBELL }, { "vhost-scsi", QEMU_CAPS_DEVICE_VHOST_SCSI }, + { "nvdimm", QEMU_CAPS_DEVICE_NVDIMM }, }; static struct virQEMUCapsStringFlags virQEMUCapsObjectPropsVirtioBalloon[] = { @@ -4358,6 +4360,13 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsProbeQMPGICCapabilities(qemuCaps, mon) < 0) goto cleanup; + /* Prealloc on NVDIMMs is broken on older QEMUs leading to + * user data corruption. If we are dealing with such version + * of QEMU pretend we don't know how to NVDIMM. */ + if (qemuCaps->version < 2009000 && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) + virQEMUCapsClear(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM); + ret = 0; cleanup: return ret; diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 0f998c473..01c4a80e6 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -394,6 +394,7 @@ typedef enum { QEMU_CAPS_QUERY_CPU_MODEL_EXPANSION, /* qmp query-cpu-model-expansion */ QEMU_CAPS_VIRTIO_NET_HOST_MTU, /* virtio-net-*.host_mtu */ QEMU_CAPS_SPICE_RENDERNODE, /* -spice rendernode */ + QEMU_CAPS_DEVICE_NVDIMM, /* -device nvdimm */ QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Introduce a qemu capability for -device nvdimm.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_capabilities.c | 9 +++++++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 10 insertions(+)
So this doesn't require tests/qemu*/caps_2.*.xml changes to add a: <flag name='nvdimm'/> ? ACK in general though... ugly for the version check, but not much that can be done I suppose. John

On 03/07/2017 05:01 PM, John Ferlan wrote:
On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Introduce a qemu capability for -device nvdimm.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_capabilities.c | 9 +++++++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 10 insertions(+)
So this doesn't require tests/qemu*/caps_2.*.xml changes to add a:
<flag name='nvdimm'/> ?
No. It doesn't. Basically, we need the version check because we are waiting for Dan's patch to get in: http://lists.nongnu.org/archive/html/qemu-devel/2017-03/msg00636.html There will be no way for us to detect that. I mean, other than version check. It sucks still, I agree on that. Therefore, even though some older qemus do support nvdims, libvirt claims no support because they are lacking the Dan's fix. It was discussed somewhere on the list, but I don't remember where from the top of my head - perhaps in review of v1? Michal

So, majority of the code is just ready as-is. Well, with one slight change: differentiate between dimm and nvdimm in places like device alias generation, generating the command line and so on. Speaking of the command line, we also need to append 'nvdimm=on' to the '-machine' argument so that the nvdimm feature is advertised in the ACPI tables properly. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_alias.c | 10 ++- src/qemu/qemu_command.c | 91 +++++++++++++++------- src/qemu/qemu_command.h | 1 + src/qemu/qemu_domain.c | 34 +++++--- src/qemu/qemu_hotplug.c | 3 +- .../qemuxml2argv-memory-hotplug-nvdimm.args | 26 +++++++ tests/qemuxml2argvtest.c | 4 +- 7 files changed, 128 insertions(+), 41 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args diff --git a/src/qemu/qemu_alias.c b/src/qemu/qemu_alias.c index 4cccf231e..05e1293ef 100644 --- a/src/qemu/qemu_alias.c +++ b/src/qemu/qemu_alias.c @@ -352,17 +352,23 @@ qemuAssignDeviceMemoryAlias(virDomainDefPtr def, size_t i; int maxidx = 0; int idx; + const char *prefix; + + if (mem->model == VIR_DOMAIN_MEMORY_MODEL_DIMM) + prefix = "dimm"; + else + prefix = "nvdimm"; if (oldAlias) { for (i = 0; i < def->nmems; i++) { - if ((idx = qemuDomainDeviceAliasIndex(&def->mems[i]->info, "dimm")) >= maxidx) + if ((idx = qemuDomainDeviceAliasIndex(&def->mems[i]->info, prefix)) >= maxidx) maxidx = idx + 1; } } else { maxidx = mem->info.addr.dimm.slot; } - if (virAsprintf(&mem->info.alias, "dimm%d", maxidx) < 0) + if (virAsprintf(&mem->info.alias, "%s%d", prefix, maxidx) < 0) return -1; return 0; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f628a9929..46a6ca9f0 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3089,6 +3089,7 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, * @userNodeset: user requested map of host nodes to alloc the memory on, NULL * for default * @autoNodeset: fallback nodeset in case of automatic NUMA placement + * @memPathReq: request memory-backend-file with specific mem-path * @force: forcibly use one of the backends * * Creates a configuration object that represents memory backend of given guest @@ -3103,6 +3104,8 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, * Then, if one of the two memory-backend-* should be used, the @qemuCaps is * consulted to check if qemu does support it. * + * If @memPathReq is non-NULL, memory-backend-file is used with passed path. + * * Returns: 0 on success, * 1 on success and if there's no need to use memory-backend-* * -1 on error. @@ -3118,6 +3121,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, unsigned long long pagesize, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, + const char *memPathReq, bool force) { virDomainHugePagePtr master_hugepage = NULL; @@ -3126,7 +3130,8 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, const long system_page_size = virGetSystemPageSizeKB(); virDomainMemoryAccess memAccess = VIR_DOMAIN_MEMORY_ACCESS_DEFAULT; size_t i; - char *mem_path = NULL; + char *memPathActual = NULL; + bool prealloc = false; virBitmapPtr nodemask = NULL; int ret = -1; virJSONValuePtr props = NULL; @@ -3206,27 +3211,36 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, if (!(props = virJSONValueNewObject())) return -1; - if (pagesize || def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + if (pagesize || memPathReq || + def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { *backendType = "memory-backend-file"; - if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + if (memPathReq) { + if (VIR_STRDUP(memPathActual, memPathReq) < 0) + goto cleanup; + prealloc = true; + } else if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { /* we can have both pagesize and mem source, then check mem source first */ + if (VIR_STRDUP(memPathActual, cfg->memoryBackingDir) < 0) + goto cleanup; force = true; - if (virJSONValueObjectAdd(props, - "s:mem-path", cfg->memoryBackingDir, - NULL) < 0) - goto cleanup; } else { - if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &mem_path) < 0) - goto cleanup; - - if (virJSONValueObjectAdd(props, - "b:prealloc", true, - "s:mem-path", mem_path, - NULL) < 0) + if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPathActual) < 0) goto cleanup; + prealloc = true; } + if (prealloc && + virJSONValueObjectAdd(props, + "b:prealloc", true, + NULL) < 0) + goto cleanup; + + if (virJSONValueObjectAdd(props, + "s:mem-path", memPathActual, + NULL) < 0) + goto cleanup; + switch (memAccess) { case VIR_DOMAIN_MEMORY_ACCESS_SHARED: if (virJSONValueObjectAdd(props, "b:share", true, NULL) < 0) @@ -3281,7 +3295,8 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, } /* If none of the following is requested... */ - if (!needHugepage && !userNodeset && !memAccess && !nodeSpecified && !force) { + if (!needHugepage && !userNodeset && + !memAccess && !nodeSpecified && !force && !memPathReq) { /* report back that using the new backend is not necessary * to achieve the desired configuration */ ret = 1; @@ -3309,8 +3324,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, cleanup: virJSONValueFree(props); - VIR_FREE(mem_path); - + VIR_FREE(memPathActual); return ret; } @@ -3338,7 +3352,7 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def, if ((rc = qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, cell, memsize, 0, NULL, - auto_nodeset, false)) < 0) + auto_nodeset, NULL, false)) < 0) goto cleanup; if (!(*backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType, @@ -3379,7 +3393,8 @@ qemuBuildMemoryDimmBackendStr(virDomainMemoryDefPtr mem, if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, mem->targetNode, mem->size, mem->pagesize, - mem->sourceNodes, auto_nodeset, true) < 0) + mem->sourceNodes, auto_nodeset, mem->path, + true) < 0) goto cleanup; ret = virQEMUBuildObjectCommandlineFromJSON(backendType, alias, props); @@ -3396,6 +3411,7 @@ char * qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) { virBuffer buf = VIR_BUFFER_INITIALIZER; + const char *device; if (!mem->info.alias) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -3404,8 +3420,15 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) } switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: case VIR_DOMAIN_MEMORY_MODEL_DIMM: - virBufferAddLit(&buf, "pc-dimm,"); + + if (mem->model == VIR_DOMAIN_MEMORY_MODEL_DIMM) + device = "pc-dimm"; + else + device = "nvdimm"; + + virBufferAsprintf(&buf, "%s,", device); if (mem->targetNode >= 0) virBufferAsprintf(&buf, "node=%d,", mem->targetNode); @@ -3421,12 +3444,6 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) break; - case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: - virReportError(VIR_ERR_NO_SUPPORT, "%s", - _("nvdimm not supported yet")); - return NULL; - break; - case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: break; @@ -6976,6 +6993,7 @@ qemuBuildMachineCommandLine(virCommandPtr cmd, { virBuffer buf = VIR_BUFFER_INITIALIZER; bool obsoleteAccel = false; + size_t i; int ret = -1; /* This should *never* be NULL, since we always provide @@ -7012,6 +7030,15 @@ qemuBuildMachineCommandLine(virCommandPtr cmd, "with this QEMU binary")); return -1; } + + for (i = 0; i < def->nmems; i++) { + if (def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm not is not available " + "with this QEMU binary")); + return -1; + } + } } else { virTristateSwitch vmport = def->features[VIR_DOMAIN_FEATURE_VMPORT]; virTristateSwitch smm = def->features[VIR_DOMAIN_FEATURE_SMM]; @@ -7132,6 +7159,18 @@ qemuBuildMachineCommandLine(virCommandPtr cmd, } } + for (i = 0; i < def->nmems; i++) { + if (def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm isn't supported by this QEMU binary")); + goto cleanup; + } + virBufferAddLit(&buf, ",nvdimm=on"); + break; + } + } + virCommandAddArgBuffer(cmd, &buf); } diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index efdad77f3..e23930255 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -134,6 +134,7 @@ int qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, unsigned long long pagesize, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, + const char *memPath, bool force); char *qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem); diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 5ec610564..f585e6f25 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -5878,6 +5878,7 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, { switch ((virDomainMemoryModel) mem->model) { case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: if (mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DIMM && mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -5910,11 +5911,6 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, } break; - case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("nvdimm hotplug not supported yet")); - return -1; - case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: return -1; @@ -5968,12 +5964,6 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, return 0; } - if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("memory hotplug isn't supported by this QEMU binary")); - return -1; - } - if (!ARCH_IS_PPC64(def->os.arch)) { /* due to guest support, qemu would silently enable NUMA with one node * once the memory hotplug backend is enabled. To avoid possible @@ -5997,6 +5987,28 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, for (i = 0; i < def->nmems; i++) { hotplugMemory += def->mems[i]->size; + switch ((virDomainMemoryModel) def->mems[i]->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("memory hotplug isn't supported by this QEMU binary")); + return -1; + } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm isn't supported by this QEMU binary")); + return -1; + } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } + /* already existing devices don't need to be checked on hotplug */ if (!mem && qemuDomainDefValidateMemoryHotplugDevice(def->mems[i], def) < 0) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index c7b8074d6..51b87804d 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2218,7 +2218,8 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, priv->qemuCaps, vm->def, mem->targetNode, mem->size, - mem->pagesize, mem->sourceNodes, NULL, true) < 0) + mem->pagesize, mem->sourceNodes, NULL, + mem->path, true) < 0) goto cleanup; if (virDomainMemoryInsert(vm->def, mem) < 0) { diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args new file mode 100644 index 000000000..907bcbeda --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=tcg,nvdimm=on \ +-m size=1048576k,slots=16,maxmem=1099511627776k \ +-smp 2,sockets=2,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0-1,mem=1024 \ +-object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\ +size=536870912 \ +-device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \ +-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index ad9ce8e2d..05a51a389 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2325,7 +2325,7 @@ mymain(void) DO_TEST_FAILURE("memory-align-fail", NONE); DO_TEST_FAILURE("memory-hotplug-nonuma", QEMU_CAPS_DEVICE_PC_DIMM); - DO_TEST_FAILURE("memory-hotplug", NONE); + DO_TEST("memory-hotplug", NONE); DO_TEST("memory-hotplug", QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_NUMA); DO_TEST("memory-hotplug-dimm", QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); @@ -2333,6 +2333,8 @@ mymain(void) QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("memory-hotplug-ppc64-nonuma", QEMU_CAPS_KVM, QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST("memory-hotplug-nvdimm", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, + QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("machine-aeskeywrap-on-caps", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_AES_KEY_WRAP, -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
So, majority of the code is just ready as-is. Well, with one slight change: differentiate between dimm and nvdimm in places like device alias generation, generating the command line and so on.
Speaking of the command line, we also need to append 'nvdimm=on' to the '-machine' argument so that the nvdimm feature is advertised in the ACPI tables properly.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_alias.c | 10 ++- src/qemu/qemu_command.c | 91 +++++++++++++++------- src/qemu/qemu_command.h | 1 + src/qemu/qemu_domain.c | 34 +++++--- src/qemu/qemu_hotplug.c | 3 +- .../qemuxml2argv-memory-hotplug-nvdimm.args | 26 +++++++ tests/qemuxml2argvtest.c | 4 +- 7 files changed, 128 insertions(+), 41 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args
diff --git a/src/qemu/qemu_alias.c b/src/qemu/qemu_alias.c index 4cccf231e..05e1293ef 100644 --- a/src/qemu/qemu_alias.c +++ b/src/qemu/qemu_alias.c @@ -352,17 +352,23 @@ qemuAssignDeviceMemoryAlias(virDomainDefPtr def, size_t i; int maxidx = 0; int idx; + const char *prefix; + + if (mem->model == VIR_DOMAIN_MEMORY_MODEL_DIMM) + prefix = "dimm"; + else + prefix = "nvdimm";
if (oldAlias) { for (i = 0; i < def->nmems; i++) { - if ((idx = qemuDomainDeviceAliasIndex(&def->mems[i]->info, "dimm")) >= maxidx) + if ((idx = qemuDomainDeviceAliasIndex(&def->mems[i]->info, prefix)) >= maxidx) maxidx = idx + 1; } } else { maxidx = mem->info.addr.dimm.slot; }
- if (virAsprintf(&mem->info.alias, "dimm%d", maxidx) < 0) + if (virAsprintf(&mem->info.alias, "%s%d", prefix, maxidx) < 0) return -1;
return 0; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f628a9929..46a6ca9f0 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3089,6 +3089,7 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, * @userNodeset: user requested map of host nodes to alloc the memory on, NULL * for default * @autoNodeset: fallback nodeset in case of automatic NUMA placement + * @memPathReq: request memory-backend-file with specific mem-path * @force: forcibly use one of the backends * * Creates a configuration object that represents memory backend of given guest @@ -3103,6 +3104,8 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, * Then, if one of the two memory-backend-* should be used, the @qemuCaps is * consulted to check if qemu does support it. * + * If @memPathReq is non-NULL, memory-backend-file is used with passed path. + *
I think if you consider my suggestion in patch 1, then this parameter becomes unnecessary and all that has to happen is the one place where the 'mem' is built on the stack would need to decide to add the field or not.
* Returns: 0 on success, * 1 on success and if there's no need to use memory-backend-* * -1 on error. @@ -3118,6 +3121,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, unsigned long long pagesize, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, + const char *memPathReq, bool force) { virDomainHugePagePtr master_hugepage = NULL; @@ -3126,7 +3130,8 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, const long system_page_size = virGetSystemPageSizeKB(); virDomainMemoryAccess memAccess = VIR_DOMAIN_MEMORY_ACCESS_DEFAULT; size_t i; - char *mem_path = NULL; + char *memPathActual = NULL; + bool prealloc = false; virBitmapPtr nodemask = NULL; int ret = -1; virJSONValuePtr props = NULL; @@ -3206,27 +3211,36 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, if (!(props = virJSONValueNewObject())) return -1;
- if (pagesize || def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + if (pagesize || memPathReq || + def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { *backendType = "memory-backend-file";
- if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { + if (memPathReq) { + if (VIR_STRDUP(memPathActual, memPathReq) < 0) + goto cleanup; + prealloc = true; + } else if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) { /* we can have both pagesize and mem source, then check mem source first */
Seeing as you're changing code in here anyway - could the comment span 2 lines so as to not exceed 80 chars?
+ if (VIR_STRDUP(memPathActual, cfg->memoryBackingDir) < 0) + goto cleanup; force = true; - if (virJSONValueObjectAdd(props, - "s:mem-path", cfg->memoryBackingDir, - NULL) < 0) - goto cleanup; } else { - if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &mem_path) < 0) - goto cleanup; - - if (virJSONValueObjectAdd(props, - "b:prealloc", true, - "s:mem-path", mem_path, - NULL) < 0) + if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPathActual) < 0) goto cleanup; + prealloc = true; }
+ if (prealloc && + virJSONValueObjectAdd(props, + "b:prealloc", true,
Assuming the qemu default would be if not present, then false... If you went with "B:prealloc", prealloc, then virJSONValueObjectAddVArgs only adds "prealloc" if prealloc is true, thus the code here could be combined...
+ NULL) < 0) + goto cleanup; + + if (virJSONValueObjectAdd(props, + "s:mem-path", memPathActual, + NULL) < 0) + goto cleanup; + switch (memAccess) { case VIR_DOMAIN_MEMORY_ACCESS_SHARED: if (virJSONValueObjectAdd(props, "b:share", true, NULL) < 0) @@ -3281,7 +3295,8 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, }
/* If none of the following is requested... */ - if (!needHugepage && !userNodeset && !memAccess && !nodeSpecified && !force) { + if (!needHugepage && !userNodeset && + !memAccess && !nodeSpecified && !force && !memPathReq) {
This is the kind of "logic" that could be replaced if 'force' (or some more suitably named replacement) was made to be an output as well...
/* report back that using the new backend is not necessary * to achieve the desired configuration */ ret = 1; @@ -3309,8 +3324,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps,
cleanup: virJSONValueFree(props); - VIR_FREE(mem_path); - + VIR_FREE(memPathActual); return ret; }
@@ -3338,7 +3352,7 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def,
if ((rc = qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, cell, memsize, 0, NULL, - auto_nodeset, false)) < 0) + auto_nodeset, NULL, false)) < 0) goto cleanup;
if (!(*backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType, @@ -3379,7 +3393,8 @@ qemuBuildMemoryDimmBackendStr(virDomainMemoryDefPtr mem,
if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, mem->targetNode, mem->size, mem->pagesize, - mem->sourceNodes, auto_nodeset, true) < 0) + mem->sourceNodes, auto_nodeset, mem->path, + true) < 0) goto cleanup;
ret = virQEMUBuildObjectCommandlineFromJSON(backendType, alias, props); @@ -3396,6 +3411,7 @@ char * qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) { virBuffer buf = VIR_BUFFER_INITIALIZER; + const char *device;
if (!mem->info.alias) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -3404,8 +3420,15 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) }
switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: case VIR_DOMAIN_MEMORY_MODEL_DIMM: - virBufferAddLit(&buf, "pc-dimm,"); + + if (mem->model == VIR_DOMAIN_MEMORY_MODEL_DIMM) + device = "pc-dimm"; + else + device = "nvdimm"; + + virBufferAsprintf(&buf, "%s,", device);
if (mem->targetNode >= 0) virBufferAsprintf(&buf, "node=%d,", mem->targetNode); @@ -3421,12 +3444,6 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem)
break;
- case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: - virReportError(VIR_ERR_NO_SUPPORT, "%s", - _("nvdimm not supported yet")); - return NULL; - break; - case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: break; @@ -6976,6 +6993,7 @@ qemuBuildMachineCommandLine(virCommandPtr cmd, { virBuffer buf = VIR_BUFFER_INITIALIZER; bool obsoleteAccel = false; + size_t i; int ret = -1;
/* This should *never* be NULL, since we always provide @@ -7012,6 +7030,15 @@ qemuBuildMachineCommandLine(virCommandPtr cmd, "with this QEMU binary")); return -1; } + + for (i = 0; i < def->nmems; i++) { + if (def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm not is not available " + "with this QEMU binary")); + return -1; + } + } } else { virTristateSwitch vmport = def->features[VIR_DOMAIN_FEATURE_VMPORT]; virTristateSwitch smm = def->features[VIR_DOMAIN_FEATURE_SMM]; @@ -7132,6 +7159,18 @@ qemuBuildMachineCommandLine(virCommandPtr cmd, } }
+ for (i = 0; i < def->nmems; i++) { + if (def->mems[i]->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm isn't supported by this QEMU binary")); + goto cleanup; + } + virBufferAddLit(&buf, ",nvdimm=on"); + break; + } + } + virCommandAddArgBuffer(cmd, &buf); }
diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index efdad77f3..e23930255 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -134,6 +134,7 @@ int qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, unsigned long long pagesize, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, + const char *memPath, bool force);
char *qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem); diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 5ec610564..f585e6f25 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -5878,6 +5878,7 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, { switch ((virDomainMemoryModel) mem->model) { case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: if (mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_DIMM && mem->info.type != VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", @@ -5910,11 +5911,6 @@ qemuDomainDefValidateMemoryHotplugDevice(const virDomainMemoryDef *mem, } break;
- case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("nvdimm hotplug not supported yet")); - return -1; - case VIR_DOMAIN_MEMORY_MODEL_NONE: case VIR_DOMAIN_MEMORY_MODEL_LAST: return -1; @@ -5968,12 +5964,6 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, return 0; }
- if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("memory hotplug isn't supported by this QEMU binary")); - return -1; - } - if (!ARCH_IS_PPC64(def->os.arch)) { /* due to guest support, qemu would silently enable NUMA with one node * once the memory hotplug backend is enabled. To avoid possible @@ -5997,6 +5987,28 @@ qemuDomainDefValidateMemoryHotplug(const virDomainDef *def, for (i = 0; i < def->nmems; i++) { hotplugMemory += def->mems[i]->size;
+ switch ((virDomainMemoryModel) def->mems[i]->model) { + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_PC_DIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("memory hotplug isn't supported by this QEMU binary")); + return -1; + } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (!virQEMUCapsGet(qemuCaps, QEMU_CAPS_DEVICE_NVDIMM)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("nvdimm isn't supported by this QEMU binary")); + return -1; + } + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } +
This could be inefficient if there are "many" [NV]DIMM's as it's making a check for each defined rather than some signifying we already checked one type (and the other obviously). Beyond that - this is ACK'able with a couple of adjustments. John
/* already existing devices don't need to be checked on hotplug */ if (!mem && qemuDomainDefValidateMemoryHotplugDevice(def->mems[i], def) < 0) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index c7b8074d6..51b87804d 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2218,7 +2218,8 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver,
if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, priv->qemuCaps, vm->def, mem->targetNode, mem->size, - mem->pagesize, mem->sourceNodes, NULL, true) < 0) + mem->pagesize, mem->sourceNodes, NULL, + mem->path, true) < 0) goto cleanup;
if (virDomainMemoryInsert(vm->def, mem) < 0) { diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args new file mode 100644 index 000000000..907bcbeda --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=tcg,nvdimm=on \ +-m size=1048576k,slots=16,maxmem=1099511627776k \ +-smp 2,sockets=2,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0-1,mem=1024 \ +-object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\ +size=536870912 \ +-device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \ +-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index ad9ce8e2d..05a51a389 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2325,7 +2325,7 @@ mymain(void)
DO_TEST_FAILURE("memory-align-fail", NONE); DO_TEST_FAILURE("memory-hotplug-nonuma", QEMU_CAPS_DEVICE_PC_DIMM); - DO_TEST_FAILURE("memory-hotplug", NONE); + DO_TEST("memory-hotplug", NONE); DO_TEST("memory-hotplug", QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_NUMA); DO_TEST("memory-hotplug-dimm", QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); @@ -2333,6 +2333,8 @@ mymain(void) QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("memory-hotplug-ppc64-nonuma", QEMU_CAPS_KVM, QEMU_CAPS_DEVICE_PC_DIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST("memory-hotplug-nvdimm", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, + QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE);
DO_TEST("machine-aeskeywrap-on-caps", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_AES_KEY_WRAP,

Now that NVDIMM has found its way into libvirt, users might want to fine tune some settings for each module separately. One such setting is 'share=on|off' for the memory-backend-file object. This setting - just like its name suggest already - enables sharing the nvdimm module with other applications. Under the hood it controls whether qemu mmaps() the file as MAP_PRIVATE or MAP_SHARED. Yet again, we have such config knob in domain XML, but it's just an attribute to numa <cell/>. This does not give fine enough tuning on per-memdevice basis so we need to have the attribute for each device too. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 15 +++++- docs/schemas/domaincommon.rng | 8 ++++ src/conf/domain_conf.c | 15 +++++- src/conf/domain_conf.h | 2 + .../qemuxml2argv-memory-hotplug-nvdimm-access.xml | 56 ++++++++++++++++++++++ ...qemuxml2xmlout-memory-hotplug-nvdimm-access.xml | 1 + tests/qemuxml2xmltest.c | 1 + 7 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index b76905cdc..00c0df2ce 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1406,7 +1406,7 @@ <span class='since'>Since 1.2.9</span> the optional attribute <code>memAccess</code> can control whether the memory is to be mapped as "shared" or "private". This is valid only for - hugepages-backed memory. + hugepages-backed memory and nvdimm modules. </p> <p> @@ -7015,7 +7015,7 @@ qemu-kvm -net nic,model=? /dev/null <pre> ... <devices> - <memory model='dimm'> + <memory model='dimm' access='private'> <target> <size unit='KiB'>524287</size> <node>0</node> @@ -7054,6 +7054,17 @@ qemu-kvm -net nic,model=? /dev/null </p> </dd> + <dt><code>access</code></dt> + <dd> + <p> + Then there is optional attribute <code>access</code> + (<span class="since">Since 3.1.0</span>) that allows + uses to fine tune mapping of the memory on per module + basis. Values are the same as for numa <cell/>: + <code>shared</code> and <code>private</code>. + </p> + </dd> + <dt><code>source</code></dt> <dd> <p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index fafd3e982..78195aae9 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4740,6 +4740,14 @@ <value>nvdimm</value> </choice> </attribute> + <optional> + <attribute name="access"> + <choice> + <value>shared</value> + <value>private</value> + </choice> + </attribute> + </optional> <interleave> <optional> <ref name="memorydev-source"/> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 4ffca7dc8..a31114cc7 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -13860,6 +13860,15 @@ virDomainMemoryDefParseXML(xmlNodePtr memdevNode, } VIR_FREE(tmp); + tmp = virXMLPropString(memdevNode, "access"); + if (tmp && + (def->access = virDomainMemoryAccessTypeFromString(tmp)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid access mode '%s'"), tmp); + goto error; + } + VIR_FREE(tmp); + /* source */ if ((node = virXPathNode("./source", ctxt)) && virDomainMemorySourceDefParseXML(node, ctxt, def) < 0) @@ -22666,7 +22675,11 @@ virDomainMemoryDefFormat(virBufferPtr buf, { const char *model = virDomainMemoryModelTypeToString(def->model); - virBufferAsprintf(buf, "<memory model='%s'>\n", model); + virBufferAsprintf(buf, "<memory model='%s'", model); + if (def->access) + virBufferAsprintf(buf, " access='%s'", + virDomainMemoryAccessTypeToString(def->access)); + virBufferAddLit(buf, ">\n"); virBufferAdjustIndent(buf, 2); if (virDomainMemorySourceDefFormat(buf, def) < 0) diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index dc949d3c9..87516ca69 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2002,6 +2002,8 @@ typedef enum { } virDomainMemoryModel; struct _virDomainMemoryDef { + virDomainMemoryAccess access; + /* source */ virBitmapPtr sourceNodes; unsigned long long pagesize; /* kibibytes */ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml new file mode 100644 index 000000000..700e961a6 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml @@ -0,0 +1,56 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory> + <memory unit='KiB'>1267710</memory> + <currentMemory unit='KiB'>1267710</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <idmap> + <uid start='0' target='1000' count='10'/> + <gid start='0' target='1000' count='10'/> + </idmap> + <cpu> + <topology sockets='2' cores='1' threads='1'/> + <numa> + <cell id='0' cpus='0-1' memory='219136' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + <memory model='nvdimm' access='private'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>523264</size> + <node>0</node> + </target> + <address type='dimm' slot='0'/> + </memory> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml new file mode 120000 index 000000000..4b0496ad5 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml @@ -0,0 +1 @@ +/home/zippy/work/libvirt/libvirt.git/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index e1c341dd5..ef49a79ca 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1079,6 +1079,7 @@ mymain(void) DO_TEST("memory-hotplug-nonuma", NONE); DO_TEST("memory-hotplug-dimm", NONE); DO_TEST("memory-hotplug-nvdimm", NONE); + DO_TEST("memory-hotplug-nvdimm-access", NONE); DO_TEST("net-udp", NONE); DO_TEST("video-virtio-gpu-device", NONE); -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Now that NVDIMM has found its way into libvirt, users might want to fine tune some settings for each module separately. One such setting is 'share=on|off' for the memory-backend-file object. This setting - just like its name suggest already - enables sharing the nvdimm module with other applications. Under the hood it controls whether qemu mmaps() the file as MAP_PRIVATE or MAP_SHARED.
Yet again, we have such config knob in domain XML, but it's just an attribute to numa <cell/>. This does not give fine enough tuning on per-memdevice basis so we need to have the attribute for each device too.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 15 +++++- docs/schemas/domaincommon.rng | 8 ++++ src/conf/domain_conf.c | 15 +++++- src/conf/domain_conf.h | 2 + .../qemuxml2argv-memory-hotplug-nvdimm-access.xml | 56 ++++++++++++++++++++++ ...qemuxml2xmlout-memory-hotplug-nvdimm-access.xml | 1 + tests/qemuxml2xmltest.c | 1 + 7 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index b76905cdc..00c0df2ce 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1406,7 +1406,7 @@ <span class='since'>Since 1.2.9</span> the optional attribute <code>memAccess</code> can control whether the memory is to be mapped as "shared" or "private". This is valid only for - hugepages-backed memory. + hugepages-backed memory and nvdimm modules. </p>
<p> @@ -7015,7 +7015,7 @@ qemu-kvm -net nic,model=? /dev/null <pre> ... <devices> - <memory model='dimm'> + <memory model='dimm' access='private'>
Hmmm... this type of change almost makes it seem as though access will be required or written out always now.
<target> <size unit='KiB'>524287</size> <node>0</node> @@ -7054,6 +7054,17 @@ qemu-kvm -net nic,model=? /dev/null </p> </dd>
+ <dt><code>access</code></dt> + <dd> + <p> + Then there is optional attribute <code>access</code>
s/Then there is optional/An optional
+ (<span class="since">Since 3.1.0</span>) that allows
It'll be "since 3.2.0" at the earliest (don't capitalize Since in the middle of a sentence)
+ uses to fine tune mapping of the memory on per module
s/allows uses to/provides the capability to/
+ basis. Values are the same as for numa <cell/>: + <code>shared</code> and <code>private</code>.
It's perhaps a nit, but I guess I just read the docs literally rather than interpretively... The "access mode='shared|private'" shows up in the "Memory Backing" section even though there's a NUMA Node Tuning section later. So, I would change "for numa <cell>" to include "Memory Backing" and a link to that, e.g. <a href="#elementsMemoryBacking">Memory Backing</a> within the text
+ </p> + </dd> + <dt><code>source</code></dt> <dd> <p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index fafd3e982..78195aae9 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4740,6 +4740,14 @@ <value>nvdimm</value> </choice> </attribute> + <optional> + <attribute name="access"> + <choice> + <value>shared</value> + <value>private</value> + </choice> + </attribute> + </optional> <interleave> <optional> <ref name="memorydev-source"/> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 4ffca7dc8..a31114cc7 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -13860,6 +13860,15 @@ virDomainMemoryDefParseXML(xmlNodePtr memdevNode, } VIR_FREE(tmp);
+ tmp = virXMLPropString(memdevNode, "access"); + if (tmp && + (def->access = virDomainMemoryAccessTypeFromString(tmp)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid access mode '%s'"), tmp); + goto error; + } + VIR_FREE(tmp); + /* source */ if ((node = virXPathNode("./source", ctxt)) && virDomainMemorySourceDefParseXML(node, ctxt, def) < 0) @@ -22666,7 +22675,11 @@ virDomainMemoryDefFormat(virBufferPtr buf, { const char *model = virDomainMemoryModelTypeToString(def->model);
- virBufferAsprintf(buf, "<memory model='%s'>\n", model); + virBufferAsprintf(buf, "<memory model='%s'", model); + if (def->access) + virBufferAsprintf(buf, " access='%s'", + virDomainMemoryAccessTypeToString(def->access)); + virBufferAddLit(buf, ">\n"); virBufferAdjustIndent(buf, 2);
if (virDomainMemorySourceDefFormat(buf, def) < 0)
Me thinks this too would need some amount of ABI checks wouldn't it? That is virDomainMemoryDefCheckABIStability for each mem? With that it would seem this is ACK-able John
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index dc949d3c9..87516ca69 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2002,6 +2002,8 @@ typedef enum { } virDomainMemoryModel;
struct _virDomainMemoryDef { + virDomainMemoryAccess access; + /* source */ virBitmapPtr sourceNodes; unsigned long long pagesize; /* kibibytes */ diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml new file mode 100644 index 000000000..700e961a6 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml @@ -0,0 +1,56 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory> + <memory unit='KiB'>1267710</memory> + <currentMemory unit='KiB'>1267710</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <idmap> + <uid start='0' target='1000' count='10'/> + <gid start='0' target='1000' count='10'/> + </idmap> + <cpu> + <topology sockets='2' cores='1' threads='1'/> + <numa> + <cell id='0' cpus='0-1' memory='219136' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + <memory model='nvdimm' access='private'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>523264</size> + <node>0</node> + </target> + <address type='dimm' slot='0'/> + </memory> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml new file mode 120000 index 000000000..4b0496ad5 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml @@ -0,0 +1 @@ +/home/zippy/work/libvirt/libvirt.git/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index e1c341dd5..ef49a79ca 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1079,6 +1079,7 @@ mymain(void) DO_TEST("memory-hotplug-nonuma", NONE); DO_TEST("memory-hotplug-dimm", NONE); DO_TEST("memory-hotplug-nvdimm", NONE); + DO_TEST("memory-hotplug-nvdimm-access", NONE); DO_TEST("net-udp", NONE);
DO_TEST("video-virtio-gpu-device", NONE);

On 03/07/2017 05:23 PM, John Ferlan wrote:
On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Now that NVDIMM has found its way into libvirt, users might want to fine tune some settings for each module separately. One such setting is 'share=on|off' for the memory-backend-file object. This setting - just like its name suggest already - enables sharing the nvdimm module with other applications. Under the hood it controls whether qemu mmaps() the file as MAP_PRIVATE or MAP_SHARED.
Yet again, we have such config knob in domain XML, but it's just an attribute to numa <cell/>. This does not give fine enough tuning on per-memdevice basis so we need to have the attribute for each device too.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 15 +++++- docs/schemas/domaincommon.rng | 8 ++++ src/conf/domain_conf.c | 15 +++++- src/conf/domain_conf.h | 2 + .../qemuxml2argv-memory-hotplug-nvdimm-access.xml | 56 ++++++++++++++++++++++ ...qemuxml2xmlout-memory-hotplug-nvdimm-access.xml | 1 + tests/qemuxml2xmltest.c | 1 + 7 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-access.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index b76905cdc..00c0df2ce 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1406,7 +1406,7 @@ <span class='since'>Since 1.2.9</span> the optional attribute <code>memAccess</code> can control whether the memory is to be mapped as "shared" or "private". This is valid only for - hugepages-backed memory. + hugepages-backed memory and nvdimm modules. </p>
<p> @@ -7015,7 +7015,7 @@ qemu-kvm -net nic,model=? /dev/null <pre> ... <devices> - <memory model='dimm'> + <memory model='dimm' access='private'>
Hmmm... this type of change almost makes it seem as though access will be required or written out always now.
<target> <size unit='KiB'>524287</size> <node>0</node> @@ -7054,6 +7054,17 @@ qemu-kvm -net nic,model=? /dev/null </p> </dd>
+ <dt><code>access</code></dt> + <dd> + <p> + Then there is optional attribute <code>access</code>
s/Then there is optional/An optional
+ (<span class="since">Since 3.1.0</span>) that allows
It'll be "since 3.2.0" at the earliest (don't capitalize Since in the middle of a sentence)
+ uses to fine tune mapping of the memory on per module
s/allows uses to/provides the capability to/
+ basis. Values are the same as for numa <cell/>: + <code>shared</code> and <code>private</code>.
It's perhaps a nit, but I guess I just read the docs literally rather than interpretively... The "access mode='shared|private'" shows up in the "Memory Backing" section even though there's a NUMA Node Tuning section later. So, I would change "for numa <cell>" to include "Memory Backing" and a link to that, e.g.
<a href="#elementsMemoryBacking">Memory Backing</a>
within the text
+ </p> + </dd> + <dt><code>source</code></dt> <dd> <p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index fafd3e982..78195aae9 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4740,6 +4740,14 @@ <value>nvdimm</value> </choice> </attribute> + <optional> + <attribute name="access"> + <choice> + <value>shared</value> + <value>private</value> + </choice> + </attribute> + </optional> <interleave> <optional> <ref name="memorydev-source"/> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 4ffca7dc8..a31114cc7 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -13860,6 +13860,15 @@ virDomainMemoryDefParseXML(xmlNodePtr memdevNode, } VIR_FREE(tmp);
+ tmp = virXMLPropString(memdevNode, "access"); + if (tmp && + (def->access = virDomainMemoryAccessTypeFromString(tmp)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("invalid access mode '%s'"), tmp); + goto error; + } + VIR_FREE(tmp); + /* source */ if ((node = virXPathNode("./source", ctxt)) && virDomainMemorySourceDefParseXML(node, ctxt, def) < 0) @@ -22666,7 +22675,11 @@ virDomainMemoryDefFormat(virBufferPtr buf, { const char *model = virDomainMemoryModelTypeToString(def->model);
- virBufferAsprintf(buf, "<memory model='%s'>\n", model); + virBufferAsprintf(buf, "<memory model='%s'", model); + if (def->access) + virBufferAsprintf(buf, " access='%s'", + virDomainMemoryAccessTypeToString(def->access)); + virBufferAddLit(buf, ">\n"); virBufferAdjustIndent(buf, 2);
if (virDomainMemorySourceDefFormat(buf, def) < 0)
Me thinks this too would need some amount of ABI checks wouldn't it? That is virDomainMemoryDefCheckABIStability for each mem?
I don't think so. 'access' is an attribute of backend, not frontend. Therefore guest has no idea what mode is particular NVDIMM in. Thus it's not part of ABI. Michal

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 11 +++++++-- src/qemu/qemu_command.h | 1 + src/qemu/qemu_hotplug.c | 2 +- .../qemuxml2argv-memory-hotplug-nvdimm-access.args | 26 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 ++ 5 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 46a6ca9f0..287387055 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3090,6 +3090,7 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, * for default * @autoNodeset: fallback nodeset in case of automatic NUMA placement * @memPathReq: request memory-backend-file with specific mem-path + * @memAccessReq: specifically requested memAccess mode * @force: forcibly use one of the backends * * Creates a configuration object that represents memory backend of given guest @@ -3122,6 +3123,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, const char *memPathReq, + virDomainMemoryAccess memAccessReq, bool force) { virDomainHugePagePtr master_hugepage = NULL; @@ -3154,6 +3156,9 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, memAccess = virDomainNumaGetNodeMemoryAccessMode(def->numa, guestNode); } + if (memAccessReq) + memAccess = memAccessReq; + if (virDomainNumatuneGetMode(def->numa, guestNode, &mode) < 0 && virDomainNumatuneGetMode(def->numa, -1, &mode) < 0) mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; @@ -3352,7 +3357,9 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def, if ((rc = qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, cell, memsize, 0, NULL, - auto_nodeset, NULL, false)) < 0) + auto_nodeset, NULL, + VIR_DOMAIN_MEMORY_ACCESS_DEFAULT, + false)) < 0) goto cleanup; if (!(*backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType, @@ -3394,7 +3401,7 @@ qemuBuildMemoryDimmBackendStr(virDomainMemoryDefPtr mem, if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, mem->targetNode, mem->size, mem->pagesize, mem->sourceNodes, auto_nodeset, mem->path, - true) < 0) + mem->access, true) < 0) goto cleanup; ret = virQEMUBuildObjectCommandlineFromJSON(backendType, alias, props); diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index e23930255..e32e1718d 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -135,6 +135,7 @@ int qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, const char *memPath, + virDomainMemoryAccess memAccessReq, bool force); char *qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 51b87804d..f2299f66e 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2219,7 +2219,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, priv->qemuCaps, vm->def, mem->targetNode, mem->size, mem->pagesize, mem->sourceNodes, NULL, - mem->path, true) < 0) + mem->path, mem->access, true) < 0) goto cleanup; if (virDomainMemoryInsert(vm->def, mem) < 0) { diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args new file mode 100644 index 000000000..d398b2294 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=tcg,nvdimm=on \ +-m size=219136k,slots=16,maxmem=1099511627776k \ +-smp 2,sockets=2,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0-1,mem=214 \ +-object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\ +share=no,size=536870912 \ +-device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \ +-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 05a51a389..97a2c55cd 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2335,6 +2335,8 @@ mymain(void) QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("memory-hotplug-nvdimm", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST("memory-hotplug-nvdimm-access", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, + QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("machine-aeskeywrap-on-caps", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_AES_KEY_WRAP, -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 11 +++++++-- src/qemu/qemu_command.h | 1 + src/qemu/qemu_hotplug.c | 2 +- .../qemuxml2argv-memory-hotplug-nvdimm-access.args | 26 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 ++ 5 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args
Umm - even more reason to pass 'mem' to qemuBuildMemoryBackendStr
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 46a6ca9f0..287387055 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3090,6 +3090,7 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, * for default * @autoNodeset: fallback nodeset in case of automatic NUMA placement * @memPathReq: request memory-backend-file with specific mem-path + * @memAccessReq: specifically requested memAccess mode * @force: forcibly use one of the backends * * Creates a configuration object that represents memory backend of given guest @@ -3122,6 +3123,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, const char *memPathReq, + virDomainMemoryAccess memAccessReq, bool force) { virDomainHugePagePtr master_hugepage = NULL; @@ -3154,6 +3156,9 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, memAccess = virDomainNumaGetNodeMemoryAccessMode(def->numa, guestNode); }
+ if (memAccessReq) + memAccess = memAccessReq; +
Does or could this overwrite what we just got if guestNode >= 0? and virDomainNumaGetNodeMemoryAccessMode returns something? Which takes precedence? Does the first check even matter if the memAccessReq is set? Could we be more specific on the condition, too... e.g. if (memAccessReq > VIR_DOMAIN_MEMORY_ACCESS_DEFAULT) John
if (virDomainNumatuneGetMode(def->numa, guestNode, &mode) < 0 && virDomainNumatuneGetMode(def->numa, -1, &mode) < 0) mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; @@ -3352,7 +3357,9 @@ qemuBuildMemoryCellBackendStr(virDomainDefPtr def,
if ((rc = qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, cell, memsize, 0, NULL, - auto_nodeset, NULL, false)) < 0) + auto_nodeset, NULL, + VIR_DOMAIN_MEMORY_ACCESS_DEFAULT, + false)) < 0) goto cleanup;
if (!(*backendStr = virQEMUBuildObjectCommandlineFromJSON(backendType, @@ -3394,7 +3401,7 @@ qemuBuildMemoryDimmBackendStr(virDomainMemoryDefPtr mem, if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, qemuCaps, def, mem->targetNode, mem->size, mem->pagesize, mem->sourceNodes, auto_nodeset, mem->path, - true) < 0) + mem->access, true) < 0) goto cleanup;
ret = virQEMUBuildObjectCommandlineFromJSON(backendType, alias, props); diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index e23930255..e32e1718d 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -135,6 +135,7 @@ int qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, const char *memPath, + virDomainMemoryAccess memAccessReq, bool force);
char *qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 51b87804d..f2299f66e 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2219,7 +2219,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, if (qemuBuildMemoryBackendStr(&props, &backendType, cfg, priv->qemuCaps, vm->def, mem->targetNode, mem->size, mem->pagesize, mem->sourceNodes, NULL, - mem->path, true) < 0) + mem->path, mem->access, true) < 0) goto cleanup;
if (virDomainMemoryInsert(vm->def, mem) < 0) { diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args new file mode 100644 index 000000000..d398b2294 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=tcg,nvdimm=on \ +-m size=219136k,slots=16,maxmem=1099511627776k \ +-smp 2,sockets=2,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0-1,mem=214 \ +-object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\ +share=no,size=536870912 \ +-device nvdimm,node=0,memdev=memnvdimm0,id=nvdimm0,slot=0 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \ +-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 05a51a389..97a2c55cd 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2335,6 +2335,8 @@ mymain(void) QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("memory-hotplug-nvdimm", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST("memory-hotplug-nvdimm-access", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, + QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE);
DO_TEST("machine-aeskeywrap-on-caps", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_AES_KEY_WRAP,

On 03/07/2017 05:31 PM, John Ferlan wrote:
On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_command.c | 11 +++++++-- src/qemu/qemu_command.h | 1 + src/qemu/qemu_hotplug.c | 2 +- .../qemuxml2argv-memory-hotplug-nvdimm-access.args | 26 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 ++ 5 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-access.args
Umm - even more reason to pass 'mem' to qemuBuildMemoryBackendStr
Yup. It makes patches more clean and readable now that I've switched to that. Thank you for "forcing" me to overcome my laziness, roll up sleeves and get the work done.
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 46a6ca9f0..287387055 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3090,6 +3090,7 @@ qemuBuildControllerDevCommandLine(virCommandPtr cmd, * for default * @autoNodeset: fallback nodeset in case of automatic NUMA placement * @memPathReq: request memory-backend-file with specific mem-path + * @memAccessReq: specifically requested memAccess mode * @force: forcibly use one of the backends * * Creates a configuration object that represents memory backend of given guest @@ -3122,6 +3123,7 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, virBitmapPtr userNodeset, virBitmapPtr autoNodeset, const char *memPathReq, + virDomainMemoryAccess memAccessReq, bool force) { virDomainHugePagePtr master_hugepage = NULL; @@ -3154,6 +3156,9 @@ qemuBuildMemoryBackendStr(virJSONValuePtr *backendProps, memAccess = virDomainNumaGetNodeMemoryAccessMode(def->numa, guestNode); }
+ if (memAccessReq) + memAccess = memAccessReq; +
Does or could this overwrite what we just got if guestNode >= 0? and virDomainNumaGetNodeMemoryAccessMode returns something? Which takes precedence? Does the first check even matter if the memAccessReq is set?
I think memAccessReq is the most specific, thus should have the highest level of importance. IOW, you can have a numa node in say 'private' mode but one specific NVDIMM module in it in 'shared' mode. Michal

For NVDIMM devices it is optionally possible to specify the size of internal storage for namespaces. Namespaces are a feature that allows users to partition the NVDIMM for different uses. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 9 ++++ docs/schemas/domaincommon.rng | 7 +++ src/conf/domain_conf.c | 19 +++++++ src/conf/domain_conf.h | 1 + src/qemu/qemu_command.c | 3 ++ .../qemuxml2argv-memory-hotplug-nvdimm-label.args | 26 ++++++++++ .../qemuxml2argv-memory-hotplug-nvdimm-label.xml | 59 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 + .../qemuxml2xmlout-memory-hotplug-nvdimm-label.xml | 1 + tests/qemuxml2xmltest.c | 1 + 10 files changed, 128 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 00c0df2ce..4a078b577 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7038,6 +7038,9 @@ qemu-kvm -net nic,model=? /dev/null <target> <size unit='KiB'>524288</size> <node>1</node> + <label> + <size unit='KiB'>128</size> + </label> </target> </memory> </devices> @@ -7117,6 +7120,12 @@ qemu-kvm -net nic,model=? /dev/null attach the memory to. The element shall be used only if the guest has NUMA nodes configured. </p> + <p> + For NVDIMM type devices one can optionally use + <code>label</code> and its subelement <code>size</code> + to configure the size of namespaces label storage + within the NVDIMM module. + </p> </dd> </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 78195aae9..aafc731f5 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4800,6 +4800,13 @@ <ref name="unsignedInt"/> </element> </optional> + <optional> + <element name="label"> + <element name="size"> + <ref name="scaledInteger"/> + </element> + </element> + </optional> </interleave> </element> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a31114cc7..7840f8e02 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -13824,6 +13824,18 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr node, &def->size, true, false) < 0) goto cleanup; + if (def->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + if (virDomainParseMemory("./label/size", "./label/size/@unit", ctxt, + &def->labelsize, false, false) < 0) + goto cleanup; + + if (def->labelsize && def->labelsize < 128) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("nvdimm label must be at least 128KiB")); + goto cleanup; + } + } + ret = 0; cleanup: @@ -22663,6 +22675,13 @@ virDomainMemoryTargetDefFormat(virBufferPtr buf, virBufferAsprintf(buf, "<size unit='KiB'>%llu</size>\n", def->size); if (def->targetNode >= 0) virBufferAsprintf(buf, "<node>%d</node>\n", def->targetNode); + if (def->labelsize) { + virBufferAddLit(buf, "<label>\n"); + virBufferAdjustIndent(buf, 2); + virBufferAsprintf(buf, "<size unit='KiB'>%llu</size>\n", def->labelsize); + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</label>\n"); + } virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</target>\n"); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 87516ca69..0e68f5770 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2013,6 +2013,7 @@ struct _virDomainMemoryDef { int model; /* virDomainMemoryModel */ int targetNode; unsigned long long size; /* kibibytes */ + unsigned long long labelsize; /* kibibytes; valid only for NVDIMM */ virDomainDeviceInfo info; }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 287387055..91ace8e38 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3440,6 +3440,9 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) if (mem->targetNode >= 0) virBufferAsprintf(&buf, "node=%d,", mem->targetNode); + if (mem->labelsize) + virBufferAsprintf(&buf, "label-size=%llu,", mem->labelsize * 1024); + virBufferAsprintf(&buf, "memdev=mem%s,id=%s", mem->info.alias, mem->info.alias); diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args new file mode 100644 index 000000000..8669f2d84 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=tcg,nvdimm=on \ +-m size=219136k,slots=16,maxmem=1099511627776k \ +-smp 2,sockets=2,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0-1,mem=214 \ +-object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\ +share=no,size=536870912 \ +-device nvdimm,node=0,label-size=131072,memdev=memnvdimm0,id=nvdimm0,slot=0 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \ +-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml new file mode 100644 index 000000000..425385251 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml @@ -0,0 +1,59 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory> + <memory unit='KiB'>1267710</memory> + <currentMemory unit='KiB'>1267710</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <idmap> + <uid start='0' target='1000' count='10'/> + <gid start='0' target='1000' count='10'/> + </idmap> + <cpu> + <topology sockets='2' cores='1' threads='1'/> + <numa> + <cell id='0' cpus='0-1' memory='219136' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + <memory model='nvdimm' access='private'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>523264</size> + <node>0</node> + <label> + <size unit='KiB'>128</size> + </label> + </target> + <address type='dimm' slot='0'/> + </memory> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 97a2c55cd..5c05556a3 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2337,6 +2337,8 @@ mymain(void) QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("memory-hotplug-nvdimm-access", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST("memory-hotplug-nvdimm-label", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, + QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("machine-aeskeywrap-on-caps", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_AES_KEY_WRAP, diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml new file mode 120000 index 000000000..e357ec582 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml @@ -0,0 +1 @@ +/home/zippy/work/libvirt/libvirt.git/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index ef49a79ca..bf62dbbb2 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1080,6 +1080,7 @@ mymain(void) DO_TEST("memory-hotplug-dimm", NONE); DO_TEST("memory-hotplug-nvdimm", NONE); DO_TEST("memory-hotplug-nvdimm-access", NONE); + DO_TEST("memory-hotplug-nvdimm-label", NONE); DO_TEST("net-udp", NONE); DO_TEST("video-virtio-gpu-device", NONE); -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
For NVDIMM devices it is optionally possible to specify the size of internal storage for namespaces. Namespaces are a feature that allows users to partition the NVDIMM for different uses.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 9 ++++ docs/schemas/domaincommon.rng | 7 +++ src/conf/domain_conf.c | 19 +++++++ src/conf/domain_conf.h | 1 + src/qemu/qemu_command.c | 3 ++ .../qemuxml2argv-memory-hotplug-nvdimm-label.args | 26 ++++++++++ .../qemuxml2argv-memory-hotplug-nvdimm-label.xml | 59 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 + .../qemuxml2xmlout-memory-hotplug-nvdimm-label.xml | 1 + tests/qemuxml2xmltest.c | 1 + 10 files changed, 128 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 00c0df2ce..4a078b577 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7038,6 +7038,9 @@ qemu-kvm -net nic,model=? /dev/null <target> <size unit='KiB'>524288</size> <node>1</node> + <label> + <size unit='KiB'>128</size> + </label> </target> </memory> </devices> @@ -7117,6 +7120,12 @@ qemu-kvm -net nic,model=? /dev/null attach the memory to. The element shall be used only if the guest has NUMA nodes configured. </p> + <p> + For NVDIMM type devices one can optionally use + <code>label</code> and its subelement <code>size</code> + to configure the size of namespaces label storage + within the NVDIMM module.
and the "unit=" is also required, right?
+ </p> </dd> </dl>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 78195aae9..aafc731f5 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4800,6 +4800,13 @@ <ref name="unsignedInt"/> </element> </optional> + <optional> + <element name="label"> + <element name="size"> + <ref name="scaledInteger"/> + </element> + </element> + </optional> </interleave> </element> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a31114cc7..7840f8e02 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -13824,6 +13824,18 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr node, &def->size, true, false) < 0) goto cleanup;
+ if (def->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + if (virDomainParseMemory("./label/size", "./label/size/@unit", ctxt, + &def->labelsize, false, false) < 0)
So one could provide a fairly large size for this? Why is '@required' false?
+ goto cleanup; + + if (def->labelsize && def->labelsize < 128) {
This makes it seem labelsize is optional, but it's not clear (to me at least) from the description above whether must provide the size as well as the label. Of course as I read on - labelsize is expected.... Let's face it, if label is provided and labelsize is 0, then well not much is going to be allowed is it?
+ virReportError(VIR_ERR_XML_ERROR, "%s", + _("nvdimm label must be at least 128KiB")); + goto cleanup; + } + } + ret = 0;
cleanup: @@ -22663,6 +22675,13 @@ virDomainMemoryTargetDefFormat(virBufferPtr buf, virBufferAsprintf(buf, "<size unit='KiB'>%llu</size>\n", def->size); if (def->targetNode >= 0) virBufferAsprintf(buf, "<node>%d</node>\n", def->targetNode); + if (def->labelsize) { + virBufferAddLit(buf, "<label>\n"); + virBufferAdjustIndent(buf, 2); + virBufferAsprintf(buf, "<size unit='KiB'>%llu</size>\n", def->labelsize);
There's no check here if labelsize wasn't provided.
+ virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</label>\n"); + }
virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</target>\n");
Similar comments from before about ABI... Additionally if NVDIMM's are included in the total memory allocation's (from my comments in patch2), wouldn't the size of this label need to also be included?
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 87516ca69..0e68f5770 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2013,6 +2013,7 @@ struct _virDomainMemoryDef { int model; /* virDomainMemoryModel */ int targetNode; unsigned long long size; /* kibibytes */ + unsigned long long labelsize; /* kibibytes; valid only for NVDIMM */
virDomainDeviceInfo info; }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 287387055..91ace8e38 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3440,6 +3440,9 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) if (mem->targetNode >= 0) virBufferAsprintf(&buf, "node=%d,", mem->targetNode);
+ if (mem->labelsize) + virBufferAsprintf(&buf, "label-size=%llu,", mem->labelsize * 1024); +
And this code checks for labelsize using the "assumption" (I suppose that if label, then we have a labelsize too. John
virBufferAsprintf(&buf, "memdev=mem%s,id=%s", mem->info.alias, mem->info.alias);
diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args new file mode 100644 index 000000000..8669f2d84 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args @@ -0,0 +1,26 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu \ +-name QEMUGuest1 \ +-S \ +-machine pc,accel=tcg,nvdimm=on \ +-m size=219136k,slots=16,maxmem=1099511627776k \ +-smp 2,sockets=2,cores=1,threads=1 \ +-numa node,nodeid=0,cpus=0-1,mem=214 \ +-object memory-backend-file,id=memnvdimm0,prealloc=yes,mem-path=/tmp/nvdimm,\ +share=no,size=536870912 \ +-device nvdimm,node=0,label-size=131072,memdev=memnvdimm0,id=nvdimm0,slot=0 \ +-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \ +-nographic \ +-nodefaults \ +-monitor unix:/tmp/lib/domain--1-QEMUGuest1/monitor.sock,server,nowait \ +-no-acpi \ +-boot c \ +-usb \ +-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \ +-device ide-drive,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml new file mode 100644 index 000000000..425385251 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml @@ -0,0 +1,59 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <maxMemory slots='16' unit='KiB'>1099511627776</maxMemory> + <memory unit='KiB'>1267710</memory> + <currentMemory unit='KiB'>1267710</currentMemory> + <vcpu placement='static' cpuset='0-1'>2</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <idmap> + <uid start='0' target='1000' count='10'/> + <gid start='0' target='1000' count='10'/> + </idmap> + <cpu> + <topology sockets='2' cores='1' threads='1'/> + <numa> + <cell id='0' cpus='0-1' memory='219136' unit='KiB'/> + </numa> + </cpu> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='disk'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='ide'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + <memory model='nvdimm' access='private'> + <source> + <path>/tmp/nvdimm</path> + </source> + <target> + <size unit='KiB'>523264</size> + <node>0</node> + <label> + <size unit='KiB'>128</size> + </label> + </target> + <address type='dimm' slot='0'/> + </memory> + </devices> +</domain> diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 97a2c55cd..5c05556a3 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -2337,6 +2337,8 @@ mymain(void) QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); DO_TEST("memory-hotplug-nvdimm-access", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE); + DO_TEST("memory-hotplug-nvdimm-label", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_DEVICE_NVDIMM, + QEMU_CAPS_NUMA, QEMU_CAPS_OBJECT_MEMORY_RAM, QEMU_CAPS_OBJECT_MEMORY_FILE);
DO_TEST("machine-aeskeywrap-on-caps", QEMU_CAPS_MACHINE_OPT, QEMU_CAPS_AES_KEY_WRAP, diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml new file mode 120000 index 000000000..e357ec582 --- /dev/null +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml @@ -0,0 +1 @@ +/home/zippy/work/libvirt/libvirt.git/tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml \ No newline at end of file diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index ef49a79ca..bf62dbbb2 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1080,6 +1080,7 @@ mymain(void) DO_TEST("memory-hotplug-dimm", NONE); DO_TEST("memory-hotplug-nvdimm", NONE); DO_TEST("memory-hotplug-nvdimm-access", NONE); + DO_TEST("memory-hotplug-nvdimm-label", NONE); DO_TEST("net-udp", NONE);
DO_TEST("video-virtio-gpu-device", NONE);

On 03/07/2017 05:55 PM, John Ferlan wrote:
On 02/27/2017 08:19 AM, Michal Privoznik wrote:
For NVDIMM devices it is optionally possible to specify the size of internal storage for namespaces. Namespaces are a feature that allows users to partition the NVDIMM for different uses.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- docs/formatdomain.html.in | 9 ++++ docs/schemas/domaincommon.rng | 7 +++ src/conf/domain_conf.c | 19 +++++++ src/conf/domain_conf.h | 1 + src/qemu/qemu_command.c | 3 ++ .../qemuxml2argv-memory-hotplug-nvdimm-label.args | 26 ++++++++++ .../qemuxml2argv-memory-hotplug-nvdimm-label.xml | 59 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 + .../qemuxml2xmlout-memory-hotplug-nvdimm-label.xml | 1 + tests/qemuxml2xmltest.c | 1 + 10 files changed, 128 insertions(+) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-memory-hotplug-nvdimm-label.xml create mode 120000 tests/qemuxml2xmloutdata/qemuxml2xmlout-memory-hotplug-nvdimm-label.xml
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 00c0df2ce..4a078b577 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -7038,6 +7038,9 @@ qemu-kvm -net nic,model=? /dev/null <target> <size unit='KiB'>524288</size> <node>1</node> + <label> + <size unit='KiB'>128</size> + </label> </target> </memory> </devices> @@ -7117,6 +7120,12 @@ qemu-kvm -net nic,model=? /dev/null attach the memory to. The element shall be used only if the guest has NUMA nodes configured. </p> + <p> + For NVDIMM type devices one can optionally use + <code>label</code> and its subelement <code>size</code> + to configure the size of namespaces label storage + within the NVDIMM module.
and the "unit=" is also required, right?
No. By default the unit is KiB. If users want to use different one, they have to specify it in @unit attribute. This is just like domain memory. That's why we can use the same parsing function.
+ </p> </dd> </dl>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 78195aae9..aafc731f5 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -4800,6 +4800,13 @@ <ref name="unsignedInt"/> </element> </optional> + <optional> + <element name="label"> + <element name="size"> + <ref name="scaledInteger"/> + </element> + </element> + </optional> </interleave> </element> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index a31114cc7..7840f8e02 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -13824,6 +13824,18 @@ virDomainMemoryTargetDefParseXML(xmlNodePtr node, &def->size, true, false) < 0) goto cleanup;
+ if (def->model == VIR_DOMAIN_MEMORY_MODEL_NVDIMM) { + if (virDomainParseMemory("./label/size", "./label/size/@unit", ctxt, + &def->labelsize, false, false) < 0)
So one could provide a fairly large size for this?
Yeah, if they have enough (disk/actual nvdimm) space. Now that I'm thinking about this, maybe we can at least check if the label-size is not greater than the size of nvdimm itself.
Why is '@required' false?
Because it is not required to specify label size.
+ goto cleanup; + + if (def->labelsize && def->labelsize < 128) {
This makes it seem labelsize is optional, but it's not clear (to me at least) from the description above whether must provide the size as well as the label.
Of course as I read on - labelsize is expected.... Let's face it, if label is provided and labelsize is 0, then well not much is going to be allowed is it?
I think you can still use it as a data storage inside the guest. Labels were not introduced in qemu at the same time as NVDIMM, so clearly you can use NVDIMMs even without labels.
+ virReportError(VIR_ERR_XML_ERROR, "%s", + _("nvdimm label must be at least 128KiB")); + goto cleanup; + } + } + ret = 0;
cleanup: @@ -22663,6 +22675,13 @@ virDomainMemoryTargetDefFormat(virBufferPtr buf, virBufferAsprintf(buf, "<size unit='KiB'>%llu</size>\n", def->size); if (def->targetNode >= 0) virBufferAsprintf(buf, "<node>%d</node>\n", def->targetNode); + if (def->labelsize) { + virBufferAddLit(buf, "<label>\n"); + virBufferAdjustIndent(buf, 2); + virBufferAsprintf(buf, "<size unit='KiB'>%llu</size>\n", def->labelsize);
There's no check here if labelsize wasn't provided.
Yes, there is.
+ virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</label>\n"); + }
virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</target>\n");
Similar comments from before about ABI...
This might actually require ABI check as the label-size is visible inside the guest. Will update it.
Additionally if NVDIMM's are included in the total memory allocation's (from my comments in patch2), wouldn't the size of this label need to also be included?
No. This is not some additional memory. Label size specifies the size of a portion of NVDIMM that is used to store labels. For instance, if I have 512MiB NVDIMM and set label-size to 2MiB, there's only 510MiB available for data. If the label-size is 128MiB then there's only 128MiB left for data.
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 87516ca69..0e68f5770 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2013,6 +2013,7 @@ struct _virDomainMemoryDef { int model; /* virDomainMemoryModel */ int targetNode; unsigned long long size; /* kibibytes */ + unsigned long long labelsize; /* kibibytes; valid only for NVDIMM */
virDomainDeviceInfo info; }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 287387055..91ace8e38 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3440,6 +3440,9 @@ qemuBuildMemoryDeviceStr(virDomainMemoryDefPtr mem) if (mem->targetNode >= 0) virBufferAsprintf(&buf, "node=%d,", mem->targetNode);
+ if (mem->labelsize) + virBufferAsprintf(&buf, "label-size=%llu,", mem->labelsize * 1024); +
And this code checks for labelsize using the "assumption" (I suppose that if label, then we have a labelsize too.
No. If mem->labelsize then mem->labelsize. Or maybe I'm misunderstanding something? Michal

When domain is being started up, we ought to relabel the host side of NVDIMM so qemu has access to it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 73 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 67219170c..b8601faa5 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -1386,6 +1386,30 @@ virSecurityDACRestoreInputLabel(virSecurityManagerPtr mgr, } +static int +virSecurityDACRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem) +{ + virSecurityDACDataPtr priv = virSecurityManagerGetPrivateData(mgr); + int ret = -1; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + ret = virSecurityDACRestoreFileLabel(priv, mem->path); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + ret = 0; + break; + } + + return ret; +} + + static int virSecurityDACRestoreAllLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -1425,6 +1449,13 @@ virSecurityDACRestoreAllLabel(virSecurityManagerPtr mgr, rc = -1; } + for (i = 0; i < def->nmems; i++) { + if (virSecurityDACRestoreMemoryLabel(mgr, + def, + def->mems[i]) < 0) + rc = -1; + } + if (virDomainChrDefForeach(def, false, virSecurityDACRestoreChardevCallback, @@ -1457,6 +1488,41 @@ virSecurityDACSetChardevCallback(virDomainDefPtr def, } +static int +virSecurityDACSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) + +{ + virSecurityDACDataPtr priv = virSecurityManagerGetPrivateData(mgr); + virSecurityLabelDefPtr seclabel; + int ret = -1; + uid_t user; + gid_t group; + + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_DAC_NAME); + if (seclabel && !seclabel->relabel) + return 0; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (virSecurityDACGetIds(seclabel, priv, &user, &group, NULL, NULL) < 0) + return -1; + + ret = virSecurityDACSetOwnership(priv, NULL, mem->path, user, group); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + ret = 0; + break; + } + + return ret; +} + + static int virSecurityDACSetAllLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -1496,6 +1562,13 @@ virSecurityDACSetAllLabel(virSecurityManagerPtr mgr, return -1; } + for (i = 0; i < def->nmems; i++) { + if (virSecurityDACSetMemoryLabel(mgr, + def, + def->mems[i]) < 0) + return -1; + } + if (virDomainChrDefForeach(def, true, virSecurityDACSetChardevCallback, -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
When domain is being started up, we ought to relabel the host side of NVDIMM so qemu has access to it.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 73 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+)
Why are the security patches not earlier? Before the command line is created?
diff --git a/src/security/security_dac.c b/src/security/security_dac.c index 67219170c..b8601faa5 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -1386,6 +1386,30 @@ virSecurityDACRestoreInputLabel(virSecurityManagerPtr mgr, }
+static int +virSecurityDACRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem) +{ + virSecurityDACDataPtr priv = virSecurityManagerGetPrivateData(mgr); + int ret = -1; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + ret = virSecurityDACRestoreFileLabel(priv, mem->path); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + ret = 0; + break; + } + + return ret; +} + + static int virSecurityDACRestoreAllLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -1425,6 +1449,13 @@ virSecurityDACRestoreAllLabel(virSecurityManagerPtr mgr, rc = -1; }
+ for (i = 0; i < def->nmems; i++) { + if (virSecurityDACRestoreMemoryLabel(mgr, + def, + def->mems[i]) < 0) + rc = -1; + } + if (virDomainChrDefForeach(def, false, virSecurityDACRestoreChardevCallback, @@ -1457,6 +1488,41 @@ virSecurityDACSetChardevCallback(virDomainDefPtr def, }
+static int +virSecurityDACSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) + +{ + virSecurityDACDataPtr priv = virSecurityManagerGetPrivateData(mgr); + virSecurityLabelDefPtr seclabel; + int ret = -1; + uid_t user; + gid_t group; + + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_DAC_NAME); + if (seclabel && !seclabel->relabel) + return 0;
If this only matters for NVDIMM, then why not put this within the case? ACK in principle... John
+ + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (virSecurityDACGetIds(seclabel, priv, &user, &group, NULL, NULL) < 0) + return -1; + + ret = virSecurityDACSetOwnership(priv, NULL, mem->path, user, group); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + ret = 0; + break; + } + + return ret; +} + + static int virSecurityDACSetAllLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -1496,6 +1562,13 @@ virSecurityDACSetAllLabel(virSecurityManagerPtr mgr, return -1; }
+ for (i = 0; i < def->nmems; i++) { + if (virSecurityDACSetMemoryLabel(mgr, + def, + def->mems[i]) < 0) + return -1; + } + if (virDomainChrDefForeach(def, true, virSecurityDACSetChardevCallback,

On 03/07/2017 06:36 PM, John Ferlan wrote:
On 02/27/2017 08:19 AM, Michal Privoznik wrote:
When domain is being started up, we ought to relabel the host side of NVDIMM so qemu has access to it.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 73 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+)
Why are the security patches not earlier? Before the command line is created?
I think it's just a matter of taste. If the file representing NVDIMM has right perms, it'll work even without this commit. Michal

When domain is being started up, we ought to relabel the host side of NVDIMM so qemu has access to it. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_selinux.c | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index e22de0653..223442105 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -1381,6 +1381,62 @@ virSecuritySELinuxRestoreInputLabel(virSecurityManagerPtr mgr, } +static int +virSecuritySELinuxSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) +{ + virSecurityLabelDefPtr seclabel; + + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_SELINUX_NAME); + if (seclabel == NULL) + return 0; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (virSecuritySELinuxSetFilecon(mgr, mem->path, + seclabel->imagelabel) < 0) + return -1; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } + + return 0; +} + + +static int +virSecuritySELinuxRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + virSecurityLabelDefPtr seclabel; + + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_SELINUX_NAME); + if (seclabel == NULL) + return 0; + + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + ret = virSecuritySELinuxRestoreFileLabel(mgr, mem->path); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + ret = 0; + break; + } + + return ret; +} + + static int virSecuritySELinuxSetTPMFileLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -2325,6 +2381,11 @@ virSecuritySELinuxRestoreAllLabel(virSecurityManagerPtr mgr, rc = -1; } + for (i = 0; i < def->nmems; i++) { + if (virSecuritySELinuxRestoreMemoryLabel(mgr, def, def->mems[i]) < 0) + return -1; + } + for (i = 0; i < def->ndisks; i++) { virDomainDiskDefPtr disk = def->disks[i]; @@ -2711,6 +2772,11 @@ virSecuritySELinuxSetAllLabel(virSecurityManagerPtr mgr, return -1; } + for (i = 0; i < def->nmems; i++) { + if (virSecuritySELinuxSetMemoryLabel(mgr, def, def->mems[i]) < 0) + return -1; + } + if (def->tpm) { if (virSecuritySELinuxSetTPMFileLabel(mgr, def, def->tpm) < 0) return -1; -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
When domain is being started up, we ought to relabel the host side of NVDIMM so qemu has access to it.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_selinux.c | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+)
diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index e22de0653..223442105 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -1381,6 +1381,62 @@ virSecuritySELinuxRestoreInputLabel(virSecurityManagerPtr mgr, }
+static int +virSecuritySELinuxSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) +{ + virSecurityLabelDefPtr seclabel; + + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_SELINUX_NAME); + if (seclabel == NULL) + return 0;
Similar question to DAC - why not put the check in the NVDIMM case.
+ + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + if (virSecuritySELinuxSetFilecon(mgr, mem->path, + seclabel->imagelabel) < 0) + return -1; + break; + + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + break; + } + + return 0; +} + + +static int +virSecuritySELinuxRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + virSecurityLabelDefPtr seclabel; + + seclabel = virDomainDefGetSecurityLabelDef(def, SECURITY_SELINUX_NAME); + if (seclabel == NULL) + return 0;
Same here... ACK in principle though John
+ + switch ((virDomainMemoryModel) mem->model) { + case VIR_DOMAIN_MEMORY_MODEL_NVDIMM: + ret = virSecuritySELinuxRestoreFileLabel(mgr, mem->path); + break; + + case VIR_DOMAIN_MEMORY_MODEL_DIMM: + case VIR_DOMAIN_MEMORY_MODEL_NONE: + case VIR_DOMAIN_MEMORY_MODEL_LAST: + ret = 0; + break; + } + + return ret; +} + + static int virSecuritySELinuxSetTPMFileLabel(virSecurityManagerPtr mgr, virDomainDefPtr def, @@ -2325,6 +2381,11 @@ virSecuritySELinuxRestoreAllLabel(virSecurityManagerPtr mgr, rc = -1; }
+ for (i = 0; i < def->nmems; i++) { + if (virSecuritySELinuxRestoreMemoryLabel(mgr, def, def->mems[i]) < 0) + return -1; + } + for (i = 0; i < def->ndisks; i++) { virDomainDiskDefPtr disk = def->disks[i];
@@ -2711,6 +2772,11 @@ virSecuritySELinuxSetAllLabel(virSecurityManagerPtr mgr, return -1; }
+ for (i = 0; i < def->nmems; i++) { + if (virSecuritySELinuxSetMemoryLabel(mgr, def, def->mems[i]) < 0) + return -1; + } + if (def->tpm) { if (virSecuritySELinuxSetTPMFileLabel(mgr, def, def->tpm) < 0) return -1;

These APIs will be used whenever we are hot (un-)plugging a memdev. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 2 ++ src/security/security_driver.h | 9 +++++++ src/security/security_manager.c | 56 +++++++++++++++++++++++++++++++++++++++++ src/security/security_manager.h | 7 ++++++ src/security/security_stack.c | 38 ++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index bce0487ab..8b87b5fd6 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1170,6 +1170,7 @@ virSecurityManagerRestoreAllLabel; virSecurityManagerRestoreDiskLabel; virSecurityManagerRestoreHostdevLabel; virSecurityManagerRestoreImageLabel; +virSecurityManagerRestoreMemoryLabel; virSecurityManagerRestoreSavedStateLabel; virSecurityManagerSetAllLabel; virSecurityManagerSetChildProcessLabel; @@ -1178,6 +1179,7 @@ virSecurityManagerSetDiskLabel; virSecurityManagerSetHostdevLabel; virSecurityManagerSetImageFDLabel; virSecurityManagerSetImageLabel; +virSecurityManagerSetMemoryLabel; virSecurityManagerSetProcessLabel; virSecurityManagerSetSavedStateLabel; virSecurityManagerSetSocketLabel; diff --git a/src/security/security_driver.h b/src/security/security_driver.h index fa65eb359..0f5cce5f8 100644 --- a/src/security/security_driver.h +++ b/src/security/security_driver.h @@ -123,6 +123,12 @@ typedef int (*virSecurityDomainSetImageLabel) (virSecurityManagerPtr mgr, typedef int (*virSecurityDomainRestoreImageLabel) (virSecurityManagerPtr mgr, virDomainDefPtr def, virStorageSourcePtr src); +typedef int (*virSecurityDomainSetMemoryLabel) (virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem); +typedef int (*virSecurityDomainRestoreMemoryLabel) (virSecurityManagerPtr mgr, + virDomainDefPtr def, + virDomainMemoryDefPtr mem); typedef int (*virSecurityDomainSetPathLabel) (virSecurityManagerPtr mgr, virDomainDefPtr def, const char *path); @@ -152,6 +158,9 @@ struct _virSecurityDriver { virSecurityDomainSetImageLabel domainSetSecurityImageLabel; virSecurityDomainRestoreImageLabel domainRestoreSecurityImageLabel; + virSecurityDomainSetMemoryLabel domainSetSecurityMemoryLabel; + virSecurityDomainRestoreMemoryLabel domainRestoreSecurityMemoryLabel; + virSecurityDomainSetDaemonSocketLabel domainSetSecurityDaemonSocketLabel; virSecurityDomainSetSocketLabel domainSetSecuritySocketLabel; virSecurityDomainClearSocketLabel domainClearSecuritySocketLabel; diff --git a/src/security/security_manager.c b/src/security/security_manager.c index d8c6facc8..6c777db1e 100644 --- a/src/security/security_manager.c +++ b/src/security/security_manager.c @@ -1052,3 +1052,59 @@ virSecurityManagerDomainSetPathLabel(virSecurityManagerPtr mgr, return 0; } + + +/** + * virSecurityManagerSetMemoryLabel: + * @mgr: security manager object + * @vm: domain definition object + * @mem: memory module to operate on + * + * Labels the host part of a memory module. + * + * Returns: 0 on success, -1 on error. + */ +int +virSecurityManagerSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem) +{ + if (mgr->drv->domainSetSecurityMemoryLabel) { + int ret; + virObjectLock(mgr); + ret = mgr->drv->domainSetSecurityMemoryLabel(mgr, vm, mem); + virObjectUnlock(mgr); + return ret; + } + + virReportUnsupportedError(); + return -1; +} + + +/** + * virSecurityManagerRestoreMemoryLabel: + * @mgr: security manager object + * @vm: domain definition object + * @mem: memory module to operate on + * + * Removes security label from the host part of a memory module. + * + * Returns: 0 on success, -1 on error. + */ +int +virSecurityManagerRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem) +{ + if (mgr->drv->domainRestoreSecurityMemoryLabel) { + int ret; + virObjectLock(mgr); + ret = mgr->drv->domainRestoreSecurityMemoryLabel(mgr, vm, mem); + virObjectUnlock(mgr); + return ret; + } + + virReportUnsupportedError(); + return -1; +} diff --git a/src/security/security_manager.h b/src/security/security_manager.h index bae8493ee..238e66cd0 100644 --- a/src/security/security_manager.h +++ b/src/security/security_manager.h @@ -162,6 +162,13 @@ int virSecurityManagerRestoreImageLabel(virSecurityManagerPtr mgr, virDomainDefPtr vm, virStorageSourcePtr src); +int virSecurityManagerSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem); +int virSecurityManagerRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem); + int virSecurityManagerDomainSetPathLabel(virSecurityManagerPtr mgr, virDomainDefPtr vm, const char *path); diff --git a/src/security/security_stack.c b/src/security/security_stack.c index 6056ae321..b02ee18a8 100644 --- a/src/security/security_stack.c +++ b/src/security/security_stack.c @@ -627,6 +627,41 @@ virSecurityStackRestoreImageLabel(virSecurityManagerPtr mgr, return rc; } +static int +virSecurityStackSetMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem) +{ + virSecurityStackDataPtr priv = virSecurityManagerGetPrivateData(mgr); + virSecurityStackItemPtr item = priv->itemsHead; + int rc = 0; + + for (; item; item = item->next) { + if (virSecurityManagerSetMemoryLabel(item->securityManager, vm, mem) < 0) + rc = -1; + } + + return rc; +} + +static int +virSecurityStackRestoreMemoryLabel(virSecurityManagerPtr mgr, + virDomainDefPtr vm, + virDomainMemoryDefPtr mem) +{ + virSecurityStackDataPtr priv = virSecurityManagerGetPrivateData(mgr); + virSecurityStackItemPtr item = priv->itemsHead; + int rc = 0; + + for (; item; item = item->next) { + if (virSecurityManagerRestoreMemoryLabel(item->securityManager, + vm, mem) < 0) + rc = -1; + } + + return rc; +} + static int virSecurityStackDomainSetPathLabel(virSecurityManagerPtr mgr, virDomainDefPtr vm, @@ -669,6 +704,9 @@ virSecurityDriver virSecurityDriverStack = { .domainSetSecurityImageLabel = virSecurityStackSetImageLabel, .domainRestoreSecurityImageLabel = virSecurityStackRestoreImageLabel, + .domainSetSecurityMemoryLabel = virSecurityStackSetMemoryLabel, + .domainRestoreSecurityMemoryLabel = virSecurityStackRestoreMemoryLabel, + .domainSetSecurityDaemonSocketLabel = virSecurityStackSetDaemonSocketLabel, .domainSetSecuritySocketLabel = virSecurityStackSetSocketLabel, .domainClearSecuritySocketLabel = virSecurityStackClearSocketLabel, -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
These APIs will be used whenever we are hot (un-)plugging a memdev.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/libvirt_private.syms | 2 ++ src/security/security_driver.h | 9 +++++++ src/security/security_manager.c | 56 +++++++++++++++++++++++++++++++++++++++++ src/security/security_manager.h | 7 ++++++ src/security/security_stack.c | 38 ++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+)
ACK - similar to other objects... John

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 3 +++ src/security/security_nop.c | 19 +++++++++++++++++++ src/security/security_selinux.c | 3 +++ 3 files changed, 25 insertions(+) diff --git a/src/security/security_dac.c b/src/security/security_dac.c index b8601faa5..fb953f891 100644 --- a/src/security/security_dac.c +++ b/src/security/security_dac.c @@ -1975,6 +1975,9 @@ virSecurityDriver virSecurityDriverDAC = { .domainSetSecurityImageLabel = virSecurityDACSetImageLabel, .domainRestoreSecurityImageLabel = virSecurityDACRestoreImageLabel, + .domainSetSecurityMemoryLabel = virSecurityDACSetMemoryLabel, + .domainRestoreSecurityMemoryLabel = virSecurityDACRestoreMemoryLabel, + .domainSetSecurityDaemonSocketLabel = virSecurityDACSetDaemonSocketLabel, .domainSetSecuritySocketLabel = virSecurityDACSetSocketLabel, .domainClearSecuritySocketLabel = virSecurityDACClearSocketLabel, diff --git a/src/security/security_nop.c b/src/security/security_nop.c index 951125dce..0a9b51528 100644 --- a/src/security/security_nop.c +++ b/src/security/security_nop.c @@ -236,6 +236,22 @@ virSecurityDomainSetImageLabelNop(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, return 0; } +static int +virSecurityDomainSetMemoryLabelNop(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem ATTRIBUTE_UNUSED) +{ + return 0; +} + +static int +virSecurityDomainRestoreMemoryLabelNop(virSecurityManagerPtr mgr ATTRIBUTE_UNUSED, + virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem ATTRIBUTE_UNUSED) +{ + return 0; +} + virSecurityDriver virSecurityDriverNop = { .privateDataLen = 0, @@ -255,6 +271,9 @@ virSecurityDriver virSecurityDriverNop = { .domainSetSecurityImageLabel = virSecurityDomainSetImageLabelNop, .domainRestoreSecurityImageLabel = virSecurityDomainRestoreImageLabelNop, + .domainSetSecurityMemoryLabel = virSecurityDomainSetMemoryLabelNop, + .domainRestoreSecurityMemoryLabel = virSecurityDomainRestoreMemoryLabelNop, + .domainSetSecurityDaemonSocketLabel = virSecurityDomainSetDaemonSocketLabelNop, .domainSetSecuritySocketLabel = virSecurityDomainSetSocketLabelNop, .domainClearSecuritySocketLabel = virSecurityDomainClearSocketLabelNop, diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c index 223442105..5c237a5fe 100644 --- a/src/security/security_selinux.c +++ b/src/security/security_selinux.c @@ -3007,6 +3007,9 @@ virSecurityDriver virSecurityDriverSELinux = { .domainSetSecurityImageLabel = virSecuritySELinuxSetImageLabel, .domainRestoreSecurityImageLabel = virSecuritySELinuxRestoreImageLabel, + .domainSetSecurityMemoryLabel = virSecuritySELinuxSetMemoryLabel, + .domainRestoreSecurityMemoryLabel = virSecuritySELinuxRestoreMemoryLabel, + .domainSetSecurityDaemonSocketLabel = virSecuritySELinuxSetDaemonSocketLabel, .domainSetSecuritySocketLabel = virSecuritySELinuxSetSocketLabel, .domainClearSecuritySocketLabel = virSecuritySELinuxClearSocketLabel, -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/security/security_dac.c | 3 +++ src/security/security_nop.c | 19 +++++++++++++++++++ src/security/security_selinux.c | 3 +++ 3 files changed, 25 insertions(+)
It would seem this should just be merged with patch 10 Or perhaps the security_stack.c altered in 10 should be moved here. Heck for that matter 9, 10, and 11 are all 'related'. For the purposes of what's needed for future patches to use this series as a model of what needs to be done - having them all in one patch just seems like it'd be easier. ACK in principle for the code though John

Now that we have APIs for relabel memdevs on hotplug, fill in the missing implementation in qemu hotplug code. The qemuSecurity wrappers might look like overkill for now, because qemu namespace code does not deal with the nvdimms yet. Nor does our cgroup code. But hey, there's cgroup_device_acl variable in qemu.conf. If users add their /dev/pmem* device in there, the device is allowed in cgroups and created in the namespace so they can successfully passthrough it to the domain. It doesn't look like overkill after all, does it? Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_hotplug.c | 13 +++++++++++ src/qemu/qemu_security.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_security.h | 8 +++++++ 3 files changed, 77 insertions(+) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index f2299f66e..7e837a422 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2191,6 +2191,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, char *objalias = NULL; const char *backendType; bool objAdded = false; + bool teardownlabel = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2232,6 +2233,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, goto removedef; } + if (qemuSecuritySetMemoryLabel(driver, vm, mem) < 0) + goto removedef; + teardownlabel = true; + qemuDomainObjEnterMonitor(driver, vm); rv = qemuMonitorAddObject(priv->mon, backendType, objalias, props); props = NULL; /* qemuMonitorAddObject consumes */ @@ -2266,6 +2271,11 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, audit: virDomainAuditMemory(vm, oldmem, newmem, "update", ret == 0); cleanup: + if (mem && ret < 0) { + if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) + VIR_WARN("Unable to restore security label on memdev"); + } + virObjectUnref(cfg); VIR_FREE(devstr); VIR_FREE(objalias); @@ -3726,6 +3736,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if ((idx = virDomainMemoryFindByDef(vm->def, mem)) >= 0) virDomainMemoryRemove(vm->def, idx); + if (qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) + VIR_WARN("Unable to restore security label on memdev"); + virDomainMemoryDefFree(mem); /* fix the balloon size */ diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index f2931976b..61934f990 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -245,3 +245,59 @@ qemuSecurityRestoreHostdevLabel(virQEMUDriverPtr driver, virSecurityManagerTransactionAbort(driver->securityManager); return ret; } + + +int +qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionStart(driver->securityManager) < 0) + goto cleanup; + + if (virSecurityManagerSetMemoryLabel(driver->securityManager, + vm->def, + mem) < 0) + goto cleanup; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionCommit(driver->securityManager, + vm->pid) < 0) + goto cleanup; + + ret = 0; + cleanup: + virSecurityManagerTransactionAbort(driver->securityManager); + return ret; +} + + +int +qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionStart(driver->securityManager) < 0) + goto cleanup; + + if (virSecurityManagerRestoreMemoryLabel(driver->securityManager, + vm->def, + mem) < 0) + goto cleanup; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionCommit(driver->securityManager, + vm->pid) < 0) + goto cleanup; + + ret = 0; + cleanup: + virSecurityManagerTransactionAbort(driver->securityManager); + return ret; +} diff --git a/src/qemu/qemu_security.h b/src/qemu/qemu_security.h index 54638908d..04a0b82d4 100644 --- a/src/qemu/qemu_security.h +++ b/src/qemu/qemu_security.h @@ -60,4 +60,12 @@ int qemuSecuritySetHostdevLabel(virQEMUDriverPtr driver, int qemuSecurityRestoreHostdevLabel(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainHostdevDefPtr hostdev); + +int qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem); + +int qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem); #endif /* __QEMU_SECURITY_H__ */ -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Now that we have APIs for relabel memdevs on hotplug, fill in the missing implementation in qemu hotplug code.
The qemuSecurity wrappers might look like overkill for now, because qemu namespace code does not deal with the nvdimms yet. Nor does our cgroup code. But hey, there's cgroup_device_acl variable in qemu.conf. If users add their /dev/pmem* device in there, the device is allowed in cgroups and created in the namespace so they can successfully passthrough it to the domain. It doesn't look like overkill after all, does it?
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_hotplug.c | 13 +++++++++++ src/qemu/qemu_security.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_security.h | 8 +++++++ 3 files changed, 77 insertions(+)
There's a git am -3 conflict here: Applying: qemu_hotplug: Relabel memdev error: patch failed: src/qemu/qemu_security.h:60 error: src/qemu/qemu_security.h: patch does not apply Patch failed at 0012 qemu_hotplug: Relabel memdev
diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index f2299f66e..7e837a422 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2191,6 +2191,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, char *objalias = NULL; const char *backendType; bool objAdded = false; + bool teardownlabel = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2232,6 +2233,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, goto removedef; }
+ if (qemuSecuritySetMemoryLabel(driver, vm, mem) < 0) + goto removedef; + teardownlabel = true; +
I almost wonder if this should go before the virDomainMemoryInsert which results in callers jumping to removedef. Secondary should the failure and teardown for this and the subsequent objects have a separate label which can save/restore the error that occurred so that I don't have to go check the called function to see if any of the functions it calls (and so on) would perhaps overwrite the error that caused the initial failure. The other "oddity" would be that by going to removedef, one jumps to audit rather than code before the AdjustMaxMemLock which would go to cleanup and avoid the audit. I dunno - I guess I'd expect the audit to occur, but we don't seem to be consistent within hotplug overall. John
qemuDomainObjEnterMonitor(driver, vm); rv = qemuMonitorAddObject(priv->mon, backendType, objalias, props); props = NULL; /* qemuMonitorAddObject consumes */ @@ -2266,6 +2271,11 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, audit: virDomainAuditMemory(vm, oldmem, newmem, "update", ret == 0); cleanup: + if (mem && ret < 0) { + if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) + VIR_WARN("Unable to restore security label on memdev"); + } + virObjectUnref(cfg); VIR_FREE(devstr); VIR_FREE(objalias); @@ -3726,6 +3736,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if ((idx = virDomainMemoryFindByDef(vm->def, mem)) >= 0) virDomainMemoryRemove(vm->def, idx);
+ if (qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) + VIR_WARN("Unable to restore security label on memdev"); + virDomainMemoryDefFree(mem);
/* fix the balloon size */ diff --git a/src/qemu/qemu_security.c b/src/qemu/qemu_security.c index f2931976b..61934f990 100644 --- a/src/qemu/qemu_security.c +++ b/src/qemu/qemu_security.c @@ -245,3 +245,59 @@ qemuSecurityRestoreHostdevLabel(virQEMUDriverPtr driver, virSecurityManagerTransactionAbort(driver->securityManager); return ret; } + + +int +qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionStart(driver->securityManager) < 0) + goto cleanup; + + if (virSecurityManagerSetMemoryLabel(driver->securityManager, + vm->def, + mem) < 0) + goto cleanup; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionCommit(driver->securityManager, + vm->pid) < 0) + goto cleanup; + + ret = 0; + cleanup: + virSecurityManagerTransactionAbort(driver->securityManager); + return ret; +} + + +int +qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionStart(driver->securityManager) < 0) + goto cleanup; + + if (virSecurityManagerRestoreMemoryLabel(driver->securityManager, + vm->def, + mem) < 0) + goto cleanup; + + if (qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT) && + virSecurityManagerTransactionCommit(driver->securityManager, + vm->pid) < 0) + goto cleanup; + + ret = 0; + cleanup: + virSecurityManagerTransactionAbort(driver->securityManager); + return ret; +} diff --git a/src/qemu/qemu_security.h b/src/qemu/qemu_security.h index 54638908d..04a0b82d4 100644 --- a/src/qemu/qemu_security.h +++ b/src/qemu/qemu_security.h @@ -60,4 +60,12 @@ int qemuSecuritySetHostdevLabel(virQEMUDriverPtr driver, int qemuSecurityRestoreHostdevLabel(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainHostdevDefPtr hostdev); + +int qemuSecuritySetMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem); + +int qemuSecurityRestoreMemoryLabel(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem); #endif /* __QEMU_SECURITY_H__ */

Some users might want to pass a blockdev or a chardev as a backend for NVDIMM. In fact, this is expected to be the mostly used configuration. Therefore libvirt should allow the device in devices CGroup then. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_cgroup.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_cgroup.h | 4 ++++ src/qemu/qemu_hotplug.c | 10 ++++++++++ 3 files changed, 63 insertions(+) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 42a47a798..8f68a22dc 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -348,6 +348,50 @@ qemuTeardownHostdevCgroup(virDomainObjPtr vm, } +int +qemuSetupMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int rv; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + VIR_DEBUG("Setting devices Cgroup for NVDIMM device: %s", mem->path); + rv = virCgroupAllowDevicePath(priv->cgroup, mem->path, + VIR_CGROUP_DEVICE_RW, false); + virDomainAuditCgroupPath(vm, priv->cgroup, "allow", + mem->path, "rw", rv == 0); + + return rv; +} + + +int +qemuTeardownMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int rv; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + rv = virCgroupDenyDevicePath(priv->cgroup, mem->path, + VIR_CGROUP_DEVICE_RWM, false); + virDomainAuditCgroupPath(vm, priv->cgroup, + "deny", mem->path, "rwm", rv == 0); + return rv; +} + + static int qemuSetupGraphicsCgroup(virDomainObjPtr vm, virDomainGraphicsDefPtr gfx) @@ -647,6 +691,11 @@ qemuSetupDevicesCgroup(virQEMUDriverPtr driver, goto cleanup; } + for (i = 0; i < vm->def->nmems; i++) { + if (qemuSetupMemoryDevicesCgroup(vm, vm->def->mems[i]) < 0) + goto cleanup; + } + for (i = 0; i < vm->def->ngraphics; i++) { if (qemuSetupGraphicsCgroup(vm, vm->def->graphics[i]) < 0) goto cleanup; diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8ae4a72ab..d016ce29d 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -43,6 +43,10 @@ int qemuSetupHostdevCgroup(virDomainObjPtr vm, int qemuTeardownHostdevCgroup(virDomainObjPtr vm, virDomainHostdevDefPtr dev) ATTRIBUTE_RETURN_CHECK; +int qemuSetupMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem); +int qemuTeardownMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem); int qemuSetupRNGCgroup(virDomainObjPtr vm, virDomainRNGDefPtr rng); int qemuTeardownRNGCgroup(virDomainObjPtr vm, diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 7e837a422..e821596bf 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2192,6 +2192,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, const char *backendType; bool objAdded = false; bool teardownlabel = false; + bool teardowncgroup = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2233,6 +2234,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, goto removedef; } + if (qemuSetupMemoryDevicesCgroup(vm, mem) < 0) + goto removedef; + teardowncgroup = true; + if (qemuSecuritySetMemoryLabel(driver, vm, mem) < 0) goto removedef; teardownlabel = true; @@ -2272,6 +2277,8 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, virDomainAuditMemory(vm, oldmem, newmem, "update", ret == 0); cleanup: if (mem && ret < 0) { + if (teardowncgroup && qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) + VIR_WARN("Unable to remove memory device cgroup ACL on hotplug fail"); if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); } @@ -3739,6 +3746,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if (qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); + if (qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) + VIR_WARN("Unable to remove memory device cgroup ACL"); + virDomainMemoryDefFree(mem); /* fix the balloon size */ -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Some users might want to pass a blockdev or a chardev as a backend for NVDIMM. In fact, this is expected to be the mostly used configuration. Therefore libvirt should allow the device in devices CGroup then.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_cgroup.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_cgroup.h | 4 ++++ src/qemu/qemu_hotplug.c | 10 ++++++++++ 3 files changed, 63 insertions(+)
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 42a47a798..8f68a22dc 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -348,6 +348,50 @@ qemuTeardownHostdevCgroup(virDomainObjPtr vm, }
+int +qemuSetupMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int rv; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0;
This would seem to be a quicker check than HasController... and should go first shouldn't it?
+ + VIR_DEBUG("Setting devices Cgroup for NVDIMM device: %s", mem->path); + rv = virCgroupAllowDevicePath(priv->cgroup, mem->path, + VIR_CGROUP_DEVICE_RW, false); + virDomainAuditCgroupPath(vm, priv->cgroup, "allow", + mem->path, "rw", rv == 0); + + return rv; +} + + +int +qemuTeardownMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + int rv; + + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0;
Same Otherwise seems reasonable (although I cannot git am it) John
+ + rv = virCgroupDenyDevicePath(priv->cgroup, mem->path, + VIR_CGROUP_DEVICE_RWM, false); + virDomainAuditCgroupPath(vm, priv->cgroup, + "deny", mem->path, "rwm", rv == 0); + return rv; +} + + static int qemuSetupGraphicsCgroup(virDomainObjPtr vm, virDomainGraphicsDefPtr gfx) @@ -647,6 +691,11 @@ qemuSetupDevicesCgroup(virQEMUDriverPtr driver, goto cleanup; }
+ for (i = 0; i < vm->def->nmems; i++) { + if (qemuSetupMemoryDevicesCgroup(vm, vm->def->mems[i]) < 0) + goto cleanup; + } + for (i = 0; i < vm->def->ngraphics; i++) { if (qemuSetupGraphicsCgroup(vm, vm->def->graphics[i]) < 0) goto cleanup; diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 8ae4a72ab..d016ce29d 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -43,6 +43,10 @@ int qemuSetupHostdevCgroup(virDomainObjPtr vm, int qemuTeardownHostdevCgroup(virDomainObjPtr vm, virDomainHostdevDefPtr dev) ATTRIBUTE_RETURN_CHECK; +int qemuSetupMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem); +int qemuTeardownMemoryDevicesCgroup(virDomainObjPtr vm, + virDomainMemoryDefPtr mem); int qemuSetupRNGCgroup(virDomainObjPtr vm, virDomainRNGDefPtr rng); int qemuTeardownRNGCgroup(virDomainObjPtr vm, diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 7e837a422..e821596bf 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2192,6 +2192,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, const char *backendType; bool objAdded = false; bool teardownlabel = false; + bool teardowncgroup = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2233,6 +2234,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, goto removedef; }
+ if (qemuSetupMemoryDevicesCgroup(vm, mem) < 0) + goto removedef; + teardowncgroup = true; + if (qemuSecuritySetMemoryLabel(driver, vm, mem) < 0) goto removedef; teardownlabel = true; @@ -2272,6 +2277,8 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, virDomainAuditMemory(vm, oldmem, newmem, "update", ret == 0); cleanup: if (mem && ret < 0) { + if (teardowncgroup && qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) + VIR_WARN("Unable to remove memory device cgroup ACL on hotplug fail"); if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); } @@ -3739,6 +3746,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if (qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev");
+ if (qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) + VIR_WARN("Unable to remove memory device cgroup ACL"); + virDomainMemoryDefFree(mem);
/* fix the balloon size */

Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 8 ++++++ src/qemu/qemu_hotplug.c | 11 +++++++ 3 files changed, 95 insertions(+) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index f585e6f25..1f6627005 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7478,6 +7478,37 @@ qemuDomainSetupAllHostdevs(virQEMUDriverConfigPtr cfg, } +static int +qemuDomainSetupMemory(virQEMUDriverConfigPtr cfg ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem, + const char *devPath) +{ + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + return qemuDomainCreateDevice(mem->path, devPath, false); +} + + +static int +qemuDomainSetupAllMemories(virQEMUDriverConfigPtr cfg, + virDomainObjPtr vm, + const char *devPath) +{ + size_t i; + + VIR_DEBUG("Setting up memories"); + for (i = 0; i < vm->def->nmems; i++) { + if (qemuDomainSetupMemory(cfg, + vm->def->mems[i], + devPath) < 0) + return -1; + } + VIR_DEBUG("Setup all memories"); + return 0; +} + + static int qemuDomainSetupChardev(virDomainDefPtr def ATTRIBUTE_UNUSED, virDomainChrDefPtr dev, @@ -7722,6 +7753,9 @@ qemuDomainBuildNamespace(virQEMUDriverConfigPtr cfg, if (qemuDomainSetupAllHostdevs(cfg, vm, devPath) < 0) goto cleanup; + if (qemuDomainSetupAllMemories(cfg, vm, devPath) < 0) + goto cleanup; + if (qemuDomainSetupAllChardevs(cfg, vm, devPath) < 0) goto cleanup; @@ -8180,6 +8214,48 @@ qemuDomainNamespaceTeardownHostdev(virQEMUDriverPtr driver, } +int +qemuDomainNamespaceSetupMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) + return 0; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (qemuDomainAttachDeviceMknod(driver, vm, mem->path) < 0) + goto cleanup; + ret = 0; + cleanup: + return ret; +} + + +int +qemuDomainNamespaceTeardownMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) + return 0; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + if (qemuDomainDetachDeviceUnlink(driver, vm, mem->path) < 0) + goto cleanup; + ret = 0; + cleanup: + return ret; +} + + int qemuDomainNamespaceSetupChardev(virQEMUDriverPtr driver, virDomainObjPtr vm, diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 72efa3336..2c4ba487a 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -834,6 +834,14 @@ int qemuDomainNamespaceTeardownHostdev(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainHostdevDefPtr hostdev); +int qemuDomainNamespaceSetupMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr memory); + +int qemuDomainNamespaceTeardownMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr memory); + int qemuDomainNamespaceSetupChardev(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainChrDefPtr chr); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index e821596bf..1f85b154d 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2193,6 +2193,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, bool objAdded = false; bool teardownlabel = false; bool teardowncgroup = false; + bool teardowndevice = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2234,6 +2235,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, goto removedef; } + if (qemuDomainNamespaceSetupMemory(driver, vm, mem) < 0) + goto removedef; + teardowndevice = true; + if (qemuSetupMemoryDevicesCgroup(vm, mem) < 0) goto removedef; teardowncgroup = true; @@ -2281,6 +2286,9 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, VIR_WARN("Unable to remove memory device cgroup ACL on hotplug fail"); if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); + if (teardowndevice && + qemuDomainNamespaceTeardownMemory(driver, vm, mem) < 0) + VIR_WARN("Unable to remove memory device from /dev"); } virObjectUnref(cfg); @@ -3749,6 +3757,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if (qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) VIR_WARN("Unable to remove memory device cgroup ACL"); + if (qemuDomainNamespaceTeardownMemory(driver, vm, mem) < 0) + VIR_WARN("Unable to remove memory device from /dev"); + virDomainMemoryDefFree(mem); /* fix the balloon size */ -- 2.11.0

On 02/27/2017 08:19 AM, Michal Privoznik wrote:
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/qemu/qemu_domain.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_domain.h | 8 ++++++ src/qemu/qemu_hotplug.c | 11 +++++++ 3 files changed, 95 insertions(+)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index f585e6f25..1f6627005 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7478,6 +7478,37 @@ qemuDomainSetupAllHostdevs(virQEMUDriverConfigPtr cfg, }
+static int +qemuDomainSetupMemory(virQEMUDriverConfigPtr cfg ATTRIBUTE_UNUSED, + virDomainMemoryDefPtr mem, + const char *devPath) +{ + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0; + + return qemuDomainCreateDevice(mem->path, devPath, false); +} + + +static int +qemuDomainSetupAllMemories(virQEMUDriverConfigPtr cfg, + virDomainObjPtr vm, + const char *devPath) +{ + size_t i; + + VIR_DEBUG("Setting up memories"); + for (i = 0; i < vm->def->nmems; i++) { + if (qemuDomainSetupMemory(cfg, + vm->def->mems[i], + devPath) < 0) + return -1; + } + VIR_DEBUG("Setup all memories"); + return 0; +} + + static int qemuDomainSetupChardev(virDomainDefPtr def ATTRIBUTE_UNUSED, virDomainChrDefPtr dev, @@ -7722,6 +7753,9 @@ qemuDomainBuildNamespace(virQEMUDriverConfigPtr cfg, if (qemuDomainSetupAllHostdevs(cfg, vm, devPath) < 0) goto cleanup;
+ if (qemuDomainSetupAllMemories(cfg, vm, devPath) < 0) + goto cleanup; + if (qemuDomainSetupAllChardevs(cfg, vm, devPath) < 0) goto cleanup;
@@ -8180,6 +8214,48 @@ qemuDomainNamespaceTeardownHostdev(virQEMUDriverPtr driver, }
+int +qemuDomainNamespaceSetupMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) + return 0; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0;
Similar to my cgroup query - I would think this check should go first...
+ + if (qemuDomainAttachDeviceMknod(driver, vm, mem->path) < 0) + goto cleanup; + ret = 0; + cleanup: + return ret; +} + + +int +qemuDomainNamespaceTeardownMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr mem) +{ + int ret = -1; + + if (!qemuDomainNamespaceEnabled(vm, QEMU_DOMAIN_NS_MOUNT)) + return 0; + + if (mem->model != VIR_DOMAIN_MEMORY_MODEL_NVDIMM) + return 0;
Same Otherwise, seems reasonable even though I cannot git am it. John
+ + if (qemuDomainDetachDeviceUnlink(driver, vm, mem->path) < 0) + goto cleanup; + ret = 0; + cleanup: + return ret; +} + + int qemuDomainNamespaceSetupChardev(virQEMUDriverPtr driver, virDomainObjPtr vm, diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 72efa3336..2c4ba487a 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -834,6 +834,14 @@ int qemuDomainNamespaceTeardownHostdev(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainHostdevDefPtr hostdev);
+int qemuDomainNamespaceSetupMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr memory); + +int qemuDomainNamespaceTeardownMemory(virQEMUDriverPtr driver, + virDomainObjPtr vm, + virDomainMemoryDefPtr memory); + int qemuDomainNamespaceSetupChardev(virQEMUDriverPtr driver, virDomainObjPtr vm, virDomainChrDefPtr chr); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index e821596bf..1f85b154d 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -2193,6 +2193,7 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, bool objAdded = false; bool teardownlabel = false; bool teardowncgroup = false; + bool teardowndevice = false; virJSONValuePtr props = NULL; virObjectEventPtr event; int id; @@ -2234,6 +2235,10 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, goto removedef; }
+ if (qemuDomainNamespaceSetupMemory(driver, vm, mem) < 0) + goto removedef; + teardowndevice = true; + if (qemuSetupMemoryDevicesCgroup(vm, mem) < 0) goto removedef; teardowncgroup = true; @@ -2281,6 +2286,9 @@ qemuDomainAttachMemory(virQEMUDriverPtr driver, VIR_WARN("Unable to remove memory device cgroup ACL on hotplug fail"); if (teardownlabel && qemuSecurityRestoreMemoryLabel(driver, vm, mem) < 0) VIR_WARN("Unable to restore security label on memdev"); + if (teardowndevice && + qemuDomainNamespaceTeardownMemory(driver, vm, mem) < 0) + VIR_WARN("Unable to remove memory device from /dev"); }
virObjectUnref(cfg); @@ -3749,6 +3757,9 @@ qemuDomainRemoveMemoryDevice(virQEMUDriverPtr driver, if (qemuTeardownMemoryDevicesCgroup(vm, mem) < 0) VIR_WARN("Unable to remove memory device cgroup ACL");
+ if (qemuDomainNamespaceTeardownMemory(driver, vm, mem) < 0) + VIR_WARN("Unable to remove memory device from /dev"); + virDomainMemoryDefFree(mem);
/* fix the balloon size */

On 02/27/2017 02:19 PM, Michal Privoznik wrote:
v2 of:
https://www.redhat.com/archives/libvir-list/2017-February/msg01229.html
Gentle ping. Michal
participants (2)
-
John Ferlan
-
Michal Privoznik