[libvirt] [PATCH v2] qemu: Rework setting process affinity
by Michal Privoznik
https://bugzilla.redhat.com/show_bug.cgi?id=1503284
The way we currently start qemu from CPU affinity POV is as
follows:
1) the child process is set affinity to all online CPUs (unless
some vcpu pinning was given in the domain XML)
2) Once qemu is running, cpuset cgroup is configured taking
memory pinning into account
Problem is that we let qemu allocate its memory just anywhere in
1) and then rely in 2) to be able to move the memory to
configured NUMA nodes. This might not be always possible (e.g.
qemu might lock some parts of its memory) and is very suboptimal
(copying large memory between NUMA nodes takes significant amount
of time).
The solution is to set affinity to one of (in priority order):
- The CPUs associated with NUMA memory affinity mask
- The CPUs associated with emulator pinning
- All online host CPUs
Later (once QEMU has allocated its memory) we then change this
again to (again in priority order):
- The CPUs associated with emulator pinning
- The CPUs returned by numad
- The CPUs associated with vCPU pinning
- All online host CPUs
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
diff to v1 (both points suggested by Dan):
- Expanded the commit message
- fixed qemuProcessGetAllCpuAffinity so that it returns online CPU map
only
src/qemu/qemu_process.c | 132 +++++++++++++++++++---------------------
1 file changed, 63 insertions(+), 69 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 7592c98b74..dace5aaca1 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -2435,6 +2435,21 @@ qemuProcessDetectIOThreadPIDs(virQEMUDriverPtr driver,
}
+static int
+qemuProcessGetAllCpuAffinity(virBitmapPtr *cpumapRet)
+{
+ *cpumapRet = NULL;
+
+ if (!virHostCPUHasBitmap())
+ return 0;
+
+ if (!(*cpumapRet = virHostCPUGetOnlineBitmap()))
+ return -1;
+
+ return 0;
+}
+
+
/*
* To be run between fork/exec of QEMU only
*/
@@ -2443,9 +2458,9 @@ static int
qemuProcessInitCpuAffinity(virDomainObjPtr vm)
{
int ret = -1;
- virBitmapPtr cpumap = NULL;
virBitmapPtr cpumapToSet = NULL;
- virBitmapPtr hostcpumap = NULL;
+ VIR_AUTOPTR(virBitmap) hostcpumap = NULL;
+ virDomainNumatuneMemMode mem_mode;
qemuDomainObjPrivatePtr priv = vm->privateData;
if (!vm->pid) {
@@ -2454,59 +2469,39 @@ qemuProcessInitCpuAffinity(virDomainObjPtr vm)
return -1;
}
- if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
- VIR_DEBUG("Set CPU affinity with advisory nodeset from numad");
- cpumapToSet = priv->autoCpuset;
+ /* Here is the deal, we can't set cpuset.mems before qemu is
+ * started as it clashes with KVM allocation. Therefore, we
+ * used to let qemu allocate its memory anywhere as we would
+ * then move the memory to desired NUMA node via CGroups.
+ * However, that might not be always possible because qemu
+ * might lock some parts of its memory (e.g. due to VFIO).
+ * Even if it possible, memory has to be copied between NUMA
+ * nodes which is suboptimal.
+ * Solution is to set affinity that matches the best what we
+ * would have set in CGroups and then fix it later, once qemu
+ * is already running. */
+ if (virDomainNumaGetNodeCount(vm->def->numa) <= 1 &&
+ virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
+ mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
+ if (virDomainNumatuneMaybeGetNodeset(vm->def->numa,
+ priv->autoNodeset,
+ &cpumapToSet,
+ -1) < 0)
+ goto cleanup;
+ } else if (vm->def->cputune.emulatorpin) {
+ cpumapToSet = vm->def->cputune.emulatorpin;
} else {
- VIR_DEBUG("Set CPU affinity with specified cpuset");
- if (vm->def->cpumask) {
- cpumapToSet = vm->def->cpumask;
- } else {
- /* You may think this is redundant, but we can't assume libvirtd
- * itself is running on all pCPUs, so we need to explicitly set
- * the spawned QEMU instance to all pCPUs if no map is given in
- * its config file */
- int hostcpus;
-
- if (virHostCPUHasBitmap()) {
- hostcpumap = virHostCPUGetOnlineBitmap();
- cpumap = virProcessGetAffinity(vm->pid);
- }
-
- if (hostcpumap && cpumap && virBitmapEqual(hostcpumap, cpumap)) {
- /* we're using all available CPUs, no reason to set
- * mask. If libvirtd is running without explicit
- * affinity, we can use hotplugged CPUs for this VM */
- ret = 0;
- goto cleanup;
- } else {
- /* setaffinity fails if you set bits for CPUs which
- * aren't present, so we have to limit ourselves */
- if ((hostcpus = virHostCPUGetCount()) < 0)
- goto cleanup;
-
- if (hostcpus > QEMUD_CPUMASK_LEN)
- hostcpus = QEMUD_CPUMASK_LEN;
-
- virBitmapFree(cpumap);
- if (!(cpumap = virBitmapNew(hostcpus)))
- goto cleanup;
-
- virBitmapSetAll(cpumap);
-
- cpumapToSet = cpumap;
- }
- }
+ if (qemuProcessGetAllCpuAffinity(&hostcpumap) < 0)
+ goto cleanup;
+ cpumapToSet = hostcpumap;
}
- if (virProcessSetAffinity(vm->pid, cpumapToSet) < 0)
+ if (cpumapToSet &&
+ virProcessSetAffinity(vm->pid, cpumapToSet) < 0)
goto cleanup;
ret = 0;
-
cleanup:
- virBitmapFree(cpumap);
- virBitmapFree(hostcpumap);
return ret;
}
#else /* !defined(HAVE_SCHED_GETAFFINITY) && !defined(HAVE_BSD_CPU_AFFINITY) */
@@ -2586,7 +2581,8 @@ qemuProcessSetupPid(virDomainObjPtr vm,
qemuDomainObjPrivatePtr priv = vm->privateData;
virDomainNumatuneMemMode mem_mode;
virCgroupPtr cgroup = NULL;
- virBitmapPtr use_cpumask;
+ virBitmapPtr use_cpumask = NULL;
+ VIR_AUTOPTR(virBitmap) hostcpumap = NULL;
char *mem_mask = NULL;
int ret = -1;
@@ -2598,12 +2594,21 @@ qemuProcessSetupPid(virDomainObjPtr vm,
}
/* Infer which cpumask shall be used. */
- if (cpumask)
+ if (cpumask) {
use_cpumask = cpumask;
- else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)
+ } else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
use_cpumask = priv->autoCpuset;
- else
+ } else if (vm->def->cpumask) {
use_cpumask = vm->def->cpumask;
+ } else {
+ /* You may think this is redundant, but we can't assume libvirtd
+ * itself is running on all pCPUs, so we need to explicitly set
+ * the spawned QEMU instance to all pCPUs if no map is given in
+ * its config file */
+ if (qemuProcessGetAllCpuAffinity(&hostcpumap) < 0)
+ goto cleanup;
+ use_cpumask = hostcpumap;
+ }
/*
* If CPU cgroup controller is not initialized here, then we need
@@ -2628,13 +2633,7 @@ qemuProcessSetupPid(virDomainObjPtr vm,
qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
goto cleanup;
- /*
- * Don't setup cpuset.mems for the emulator, they need to
- * be set up after initialization in order for kvm
- * allocations to succeed.
- */
- if (nameval != VIR_CGROUP_THREAD_EMULATOR &&
- mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
+ if (mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
goto cleanup;
}
@@ -6634,12 +6633,7 @@ qemuProcessLaunch(virConnectPtr conn,
/* This must be done after cgroup placement to avoid resetting CPU
* affinity */
- if (!vm->def->cputune.emulatorpin &&
- qemuProcessInitCpuAffinity(vm) < 0)
- goto cleanup;
-
- VIR_DEBUG("Setting emulator tuning/settings");
- if (qemuProcessSetupEmulator(vm) < 0)
+ if (qemuProcessInitCpuAffinity(vm) < 0)
goto cleanup;
VIR_DEBUG("Setting cgroup for external devices (if required)");
@@ -6708,10 +6702,6 @@ qemuProcessLaunch(virConnectPtr conn,
if (qemuProcessUpdateAndVerifyCPU(driver, vm, asyncJob) < 0)
goto cleanup;
- VIR_DEBUG("Setting up post-init cgroup restrictions");
- if (qemuSetupCpusetMems(vm) < 0)
- goto cleanup;
-
VIR_DEBUG("setting up hotpluggable cpus");
if (qemuDomainHasHotpluggableStartupVcpus(vm->def)) {
if (qemuDomainRefreshVcpuInfo(driver, vm, asyncJob, false) < 0)
@@ -6737,6 +6727,10 @@ qemuProcessLaunch(virConnectPtr conn,
if (qemuProcessDetectIOThreadPIDs(driver, vm, asyncJob) < 0)
goto cleanup;
+ VIR_DEBUG("Setting emulator tuning/settings");
+ if (qemuProcessSetupEmulator(vm) < 0)
+ goto cleanup;
+
VIR_DEBUG("Setting global CPU cgroup (if required)");
if (qemuSetupGlobalCpuCgroup(vm) < 0)
goto cleanup;
--
2.19.2
5 years, 10 months
[libvirt] [PATCH 0/4] qemu: Add PCI support for RISC-V guests
by Andrea Bolognani
Now that the QEMU part has been merged, it's time to make the
feature available to libvirt users as well.
Andrea Bolognani (4):
tests: Add capabilities data for QEMU 4.0.0 on RISC-V
qemu: Add PCI support for RISC-V guests
tests: Add test for PCI usage on RISC-V
news: Update for PCI support on RISC-V
docs/news.xml | 10 +
src/qemu/qemu_capabilities.c | 4 +-
src/qemu/qemu_domain.c | 2 +
src/qemu/qemu_domain_address.c | 3 +-
.../caps_4.0.0.riscv32.replies | 17625 ++++++++++++++++
.../caps_4.0.0.riscv32.xml | 180 +
.../caps_4.0.0.riscv64.replies | 17625 ++++++++++++++++
.../caps_4.0.0.riscv64.xml | 180 +
tests/qemucapabilitiestest.c | 2 +
.../caps_4.0.0.riscv32.xml | 25 +
.../caps_4.0.0.riscv64.xml | 25 +
tests/qemucaps2xmltest.c | 2 +
.../riscv64-virt-headless.riscv64-latest.args | 3 +-
tests/qemuxml2argvdata/riscv64-virt-pci.args | 27 +
tests/qemuxml2argvdata/riscv64-virt-pci.xml | 24 +
tests/qemuxml2argvtest.c | 2 +
tests/qemuxml2xmloutdata/riscv64-virt-pci.xml | 28 +
tests/qemuxml2xmltest.c | 2 +
18 files changed, 35766 insertions(+), 3 deletions(-)
create mode 100644 tests/qemucapabilitiesdata/caps_4.0.0.riscv32.replies
create mode 100644 tests/qemucapabilitiesdata/caps_4.0.0.riscv32.xml
create mode 100644 tests/qemucapabilitiesdata/caps_4.0.0.riscv64.replies
create mode 100644 tests/qemucapabilitiesdata/caps_4.0.0.riscv64.xml
create mode 100644 tests/qemucaps2xmloutdata/caps_4.0.0.riscv32.xml
create mode 100644 tests/qemucaps2xmloutdata/caps_4.0.0.riscv64.xml
create mode 100644 tests/qemuxml2argvdata/riscv64-virt-pci.args
create mode 100644 tests/qemuxml2argvdata/riscv64-virt-pci.xml
create mode 100644 tests/qemuxml2xmloutdata/riscv64-virt-pci.xml
--
2.20.1
5 years, 10 months
[libvirt] add ivshmem property master
by 吴 雨霖
[Problem Description]
I read qemu-doc.texi in qemu-mster source code, which have a explain of migrating ivshmem below
“With device property @option{master=on}, the guest will copy the shared.memory on migration to the destination host. With @option{master=off}, the guest will not be able to migrate with the device attached.”
However, libvirt library can not recognize the property “master=on”. When I directly used command "qemu-kvm -device ivshmem-plain,id=shmem0,memdev=shmmem-shmem0,master=on,bus=pci.0,addr=0xa” to launch a guest, qemu support ivshmem property master=on.
So, I suggest adding code to support property master=on in libvirt.
[Code Review]
The below is the part of source code in qemu-master. There's no definition here about ivshmem master.
domain_conf.h
struct _virDomainShmemDef {
char *name;
unsigned long long size;
int model; /* enum virDomainShmemModel */
struct {
bool enabled;
virDomainChrSourceDef chr;
} server;
struct {
bool enabled;
unsigned vectors;
virTristateSwitch ioeventfd;
} msi;
virDomainDeviceInfo info;
};
[changed code]
src/conf/domain_conf.c
src/conf/domain_conf.h
[Detail of Source Code Modification]
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 9f75dc4..b41be37 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -14665,7 +14665,6 @@ virDomainShmemDefParseXML(virDomainXMLOptionPtr xmlopt,
xmlNodePtr save = ctxt->node;
xmlNodePtr server = NULL;
-
if (VIR_ALLOC(def) < 0)
return NULL;
-
if (VIR_ALLOC(def) < 0)
return NULL;
@@ -14685,12 +14684,28 @@ virDomainShmemDefParseXML(virDomainXMLOptionPtr xmlopt,
VIR_FREE(tmp);
}
+
if (!(def->name = virXMLPropString(node, "name"))) {
virReportError(VIR_ERR_XML_ERROR, "%s",
_("shmem element must contain 'name' attribute"));
goto cleanup;
}
+
+ if ((tmp = virXMLPropString(node, "master"))) {
+ int val;
+
+ if ((val = virTristateSwitchTypeFromString(tmp)) <= 0) {
+ virReportError(VIR_ERR_XML_ERROR,
+ _("invalid ivshmem master setting for shmem: '%s'"),
+ tmp);
+ goto cleanup;
+ }
+ def->master = val;
+ VIR_FREE(tmp);
+ }
+
+
if (virDomainParseScaledValue("./size[1]", NULL, ctxt,
&def->size, 1, ULLONG_MAX, false) < 0)
goto cleanup;
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index f1e6e4e..615d721 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -1727,8 +1727,10 @@ typedef enum {
struct _virDomainShmemDef {
char *name;
+
unsigned long long size;
int model; /* enum virDomainShmemModel */
+ virTristateSwitch master;
struct {
bool enabled;
virDomainChrSourceDef chr;
}
5 years, 10 months
[libvirt] [cim PATCH] Ensure nul termination of hostname
by Daniel P. Berrangé
Newest GCC warns that the string copying is potentially truncated and
thus not nul terminated.
In file included from /usr/include/string.h:494,
from ../../src/Virt_HostSystem.c:23:
In function ‘strncpy’,
inlined from ‘resolve_host’ at ../../src/Virt_HostSystem.c:55:28,
inlined from ‘get_fqdn’ at ../../src/Virt_HostSystem.c:92:23,
inlined from ‘set_host_system_properties’ at ../../src/Virt_HostSystem.c:109:13:
/usr/include/bits/string_fortified.h:106:10: error: ‘__builtin_strncpy’ specified bound 256 equals destination size [-Werror=stringop-truncation]
106 | return __builtin___strncpy_chk (__dest, __src, __len, __bos (__dest));
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In function ‘strncpy’,
inlined from ‘resolve_host’ at ../../src/Virt_HostSystem.c:67:17,
inlined from ‘get_fqdn’ at ../../src/Virt_HostSystem.c:92:23,
inlined from ‘set_host_system_properties’ at ../../src/Virt_HostSystem.c:109:13:
/usr/include/bits/string_fortified.h:106:10: error: ‘__builtin_strncpy’ specified bound 256 equals destination size [-Werror=stringop-truncation]
106 | return __builtin___strncpy_chk (__dest, __src, __len, __bos (__dest));
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
Signed-off-by: Daniel P. Berrangé <berrange(a)redhat.com>
---
src/Virt_HostSystem.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/src/Virt_HostSystem.c b/src/Virt_HostSystem.c
index ebe8184..5bc52ca 100644
--- a/src/Virt_HostSystem.c
+++ b/src/Virt_HostSystem.c
@@ -38,7 +38,7 @@
const static CMPIBroker *_BROKER;
-static int resolve_host(char *host, char *buf, int size)
+static int resolve_host(char *host, int size)
{
struct hostent *he;
int i;
@@ -52,7 +52,8 @@ static int resolve_host(char *host, char *buf, int size)
for (i = 0; he->h_aliases[i] != NULL; i++) {
if ((strchr(he->h_aliases[i], '.') != NULL) &&
(strstr(he->h_aliases[i], "localhost") == NULL)) {
- strncpy(buf, he->h_aliases[i], size);
+ strncpy(host, he->h_aliases[i], size - 1);
+ host[size - 1] = '\0';
return 0;
}
}
@@ -63,12 +64,13 @@ static int resolve_host(char *host, char *buf, int size)
// but also be sure the value isn't empty and that it doesn't
// contain "localhost"
if ((he->h_name != NULL) && (!STREQC(he->h_name, "")) &&
- (strstr(he->h_name, "localhost") == NULL))
- strncpy(buf, he->h_name, size);
- else if ((host != NULL) && (!STREQC(host, "")) &&
- (strstr(host, "localhost") == NULL))
- strncpy(buf, host, size);
- else {
+ (strstr(he->h_name, "localhost") == NULL)) {
+ strncpy(host, he->h_name, size - 1);
+ host[size - 1] = '\0';
+ } else if ((host != NULL) && (!STREQC(host, "")) &&
+ (strstr(host, "localhost") == NULL)) {
+ return 0;
+ } else {
CU_DEBUG("Unable to find valid hostname value.");
return -1;
}
@@ -76,20 +78,18 @@ static int resolve_host(char *host, char *buf, int size)
return 0;
}
-static int get_fqdn(char *buf, int size)
+static int get_fqdn(char *host, int size)
{
- char host[256];
int ret = 0;
- if (gethostname(host, sizeof(host)) != 0) {
+ if (gethostname(host, size) != 0) {
CU_DEBUG("gethostname(): %m");
return -1;
}
- if (strchr(host, '.') != NULL)
- strncpy(buf, host, size);
- else
- ret = resolve_host(host, buf, size);
+ if (strchr(host, '.') == NULL) {
+ ret = resolve_host(host, size);
+ }
return ret;
}
--
2.20.1
5 years, 10 months
[libvirt] [cim PATCH] Update description of DMTF schema distribution terms
by Daniel P. Berrangé
The schema files that we actually download & bundle in the tar.gz dist
have removed the clause "for uses consistent with this purpose" which
is good because that clause might be considered a distribution
restriction which could make it a non-free license.
Signed-off-by: Daniel P. Berrangé <berrange(a)redhat.com>
---
base_schema/README.DMTF | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/base_schema/README.DMTF b/base_schema/README.DMTF
index 3e6b164..66da62d 100644
--- a/base_schema/README.DMTF
+++ b/base_schema/README.DMTF
@@ -4,6 +4,5 @@ distribution per the guidelines at the top of the main MOF file:
// DMTF is a not-for-profit association of industry members dedicated
// to promoting enterprise and systems management and interoperability.
-// DMTF specifications and documents may be reproduced for uses
-// consistent with this purpose by members and non-members,
-// provided that correct attribution is given.
+// DMTF specifications and documents may be reproduced by
+// members and non-members, provided that correct attribution is given.
--
2.20.1
5 years, 10 months
[libvirt] add ivshmem property master
by 吴 雨霖
[Problem Description]
I read qemu-doc.texi in qemu-mster source code, which have a explain of migrating ivshmem below
“With device property @option{master=on}, the guest will copy the shared.memory on migration to the destination host. With @option{master=off}, the guest will not be able to migrate with the device attached.”
However, libvirt library can not recognize the property “master=on”. When I directly used command "qemu-kvm -device ivshmem-plain,id=shmem0,memdev=shmmem-shmem0,master=on,bus=pci.0,addr=0xa” to launch a guest, qemu support ivshmem property master=on.
So, I suggest adding code to support property master=on in libvirt.
[Code Review]
The below is the part of source code in qemu-master. There's no definition here about ivshmem master.
domain_conf.h
struct _virDomainShmemDef {
char *name;
unsigned long long size;
int model; /* enum virDomainShmemModel */
struct {
bool enabled;
virDomainChrSourceDef chr;
} server;
struct {
bool enabled;
unsigned vectors;
virTristateSwitch ioeventfd;
} msi;
virDomainDeviceInfo info;
};
[changed code]
src/conf/domain_conf.c
src/conf/domain_conf.h
[Detail of Source Code Modification]
src/conf/domain_conf.h
[cid:image001.png@01D4BA48.57FBFA10]
src/conf/domain_conf.c
[cid:image002.png@01D4BA48.5B8017C0]
5 years, 10 months