KVM added ability to get the thread ID for vCPUs via the monitor
(qemu) info cpus
* CPU #0: pc=0x00000000000ffff0 thread_id=11463
CPU #1: pc=0x00000000fffffff0 thread_id=11464
CPU #2: pc=0x00000000fffffff0 thread_id=11465
With this we have enough information to be able to support vCPU pinning in
the QEMU driver for KVM. For QEMU/KQEMU it is trivial, since they have a
single thread.
The following patch implements CPU pinning and fetching of CPU affinity
information. In this example I pin one of the 2 cpus in a guest:
[berrange@t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system start VirtTest
Domain VirtTest started
[berrange@t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system vcpuinfo VirtTest
VCPU: 0
CPU: 0
State: running
CPU Affinity: yy
VCPU: 1
CPU: 0
State: running
CPU Affinity: yy
[berrange@t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system vcpupin VirtTest 1
0
[berrange@t60wlan libvirt-numa]$ ./src/virsh --connect qemu:///system vcpuinfo VirtTest
VCPU: 0
CPU: 0
State: running
CPU Affinity: yy
VCPU: 1
CPU: 0
State: running
CPU Affinity: y-
This is implemented using sched_setaffinity/sched_getaffinity which are
Linux specific. There doesn't appear to be a portable process affinity
API in POSIX.
If the KVM instance does not support the 'thread_id' data in 'info cpus',
we simply print out a suitable error message. We detect the mapping at
startup and cache it thereafter.
Dan.
diff -r 0f537442ce97 src/qemu_conf.h
--- a/src/qemu_conf.h Fri May 16 16:09:57 2008 -0400
+++ b/src/qemu_conf.h Fri May 16 17:39:29 2008 -0400
@@ -328,6 +328,9 @@
int *tapfds;
int ntapfds;
+ int nvcpupids;
+ int *vcpupids;
+
int qemuVersion;
int qemuCmdFlags; /* values from enum qemud_cmd_flags */
diff -r 0f537442ce97 src/qemu_driver.c
--- a/src/qemu_driver.c Fri May 16 16:09:57 2008 -0400
+++ b/src/qemu_driver.c Fri May 16 17:39:29 2008 -0400
@@ -61,6 +61,7 @@
#include "nodeinfo.h"
#include "stats_linux.h"
#include "capabilities.h"
+#include "memory.h"
static int qemudShutdown(void);
@@ -118,6 +119,10 @@
struct qemud_network *network);
static int qemudDomainGetMaxVcpus(virDomainPtr dom);
+static int qemudMonitorCommand (const struct qemud_driver *driver,
+ const struct qemud_vm *vm,
+ const char *cmd,
+ char **reply);
static struct qemud_driver *qemu_driver = NULL;
@@ -608,6 +613,106 @@
return ret;
}
+static int
+qemudDetectVcpuPIDs(virConnectPtr conn,
+ struct qemud_driver *driver,
+ struct qemud_vm *vm) {
+ char *qemucpus = NULL;
+ char *line;
+ int lastVcpu = -1;
+
+ /* Only KVM has seperate threads for CPUs,
+ others just use main QEMU process for CPU */
+ if (vm->def->virtType != QEMUD_VIRT_KVM)
+ vm->nvcpupids = 1;
+ else
+ vm->nvcpupids = vm->def->vcpus;
+
+ if (VIR_ALLOC_N(vm->vcpupids, vm->nvcpupids) < 0) {
+ qemudReportError(conn, NULL, NULL, VIR_ERR_NO_MEMORY,
+ "%s", _("allocate cpumap"));
+ return -1;
+ }
+
+ if (vm->def->virtType != QEMUD_VIRT_KVM) {
+ vm->vcpupids[0] = vm->pid;
+ return 0;
+ }
+
+ if (qemudMonitorCommand(driver, vm, "info cpus", &qemucpus) < 0) {
+ qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+ "%s", _("cannot run monitor command to fetch CPU
thread info"));
+ VIR_FREE(vm->vcpupids);
+ vm->nvcpupids = 0;
+ return -1;
+ }
+
+ /*
+ * This is the gross format we're about to parse :-{
+ *
+ * (qemu) info cpus
+ * * CPU #0: pc=0x00000000000f0c4a thread_id=30019
+ * CPU #1: pc=0x00000000fffffff0 thread_id=30020
+ * CPU #2: pc=0x00000000fffffff0 thread_id=30021
+ *
+ */
+ line = qemucpus;
+ do {
+ char *offset = strchr(line, '#');
+ char *end = NULL;
+ int vcpu = 0, tid = 0;
+
+ /* See if we're all done */
+ if (offset == NULL)
+ break;
+
+ /* Extract VCPU number */
+ if (virStrToLong_i(offset + 1, &end, 10, &vcpu) < 0)
+ goto error;
+ if (end == NULL || *end != ':')
+ goto error;
+
+ /* Extract host Thread ID */
+ if ((offset = strstr(line, "thread_id=")) == NULL)
+ goto error;
+ if (virStrToLong_i(offset + strlen("thread_id="), &end, 10,
&tid) < 0)
+ goto error;
+ if (end == NULL || !c_isspace(*end))
+ goto error;
+
+ /* Validate the VCPU is in expected range & order */
+ if (vcpu > vm->nvcpupids ||
+ vcpu != (lastVcpu + 1))
+ goto error;
+
+ lastVcpu = vcpu;
+ vm->vcpupids[vcpu] = tid;
+
+ /* Skip to next data line */
+ line = strchr(offset, '\r');
+ if (line == NULL)
+ line = strchr(offset, '\n');
+ } while (line != NULL);
+
+ /* Validate we got data for all VCPUs we expected */
+ if (lastVcpu != (vm->def->vcpus - 1))
+ goto error;
+
+ free(qemucpus);
+ return 0;
+
+error:
+ VIR_FREE(vm->vcpupids);
+ vm->vcpupids = 0;
+ free(qemucpus);
+
+ /* Explicitly return success, not error. Older KVM does
+ not have vCPU -> Thread mapping info and we don't
+ want to break its use. This merely disables ability
+ to pin vCPUS with libvirt */
+ return 0;
+}
+
static int qemudNextFreeVNCPort(struct qemud_driver *driver ATTRIBUTE_UNUSED) {
int i;
@@ -785,6 +890,11 @@
qemudShutdownVMDaemon(conn, driver, vm);
return -1;
}
+
+ if (qemudDetectVcpuPIDs(conn, driver, vm) < 0) {
+ qemudShutdownVMDaemon(conn, driver, vm);
+ return -1;
+ }
}
return ret;
@@ -857,6 +967,9 @@
vm->pid = -1;
vm->id = -1;
vm->state = VIR_DOMAIN_SHUTOFF;
+ free(vm->vcpupids);
+ vm->vcpupids = NULL;
+ vm->nvcpupids = 0;
if (vm->newDef) {
qemudFreeVMDef(vm->def);
@@ -2271,6 +2384,127 @@
vm->def->vcpus = nvcpus;
return 0;
+}
+
+
+static int
+qemudDomainPinVcpu(virDomainPtr dom,
+ unsigned int vcpu,
+ unsigned char *cpumap,
+ int maplen) {
+ struct qemud_driver *driver = (struct qemud_driver *)dom->conn->privateData;
+ struct qemud_vm *vm = qemudFindVMByUUID(driver, dom->uuid);
+ cpu_set_t mask;
+ int i, maxcpu;
+ virNodeInfo nodeinfo;
+
+ if (!qemudIsActiveVM(vm)) {
+ qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+ "%s",_("cannot pin vcpus on an inactive
domain"));
+ return -1;
+ }
+
+ if (vcpu > (vm->nvcpupids-1)) {
+ qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+ _("vcpu number out of range %d > %d"),
+ vcpu, vm->nvcpupids);
+ return -1;
+ }
+
+ if (virNodeInfoPopulate(dom->conn, &nodeinfo) < 0)
+ return -1;
+
+ maxcpu = maplen * 8;
+ if (maxcpu > nodeinfo.cpus)
+ maxcpu = nodeinfo.cpus;
+
+ CPU_ZERO(&mask);
+ for (i = 0 ; i < maxcpu ; i++) {
+ if ((cpumap[i/8] >> (i % 8)) & 1)
+ CPU_SET(i, &mask);
+ }
+
+ if (vm->vcpupids != NULL) {
+ if (sched_setaffinity(vm->vcpupids[vcpu], sizeof(mask), &mask) < 0) {
+ qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+ _("cannot set affinity: %s"), strerror(errno));
+ return -1;
+ }
+ } else {
+ qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT,
+ "%s", _("cpu affinity is not supported"));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+qemudDomainGetVcpus(virDomainPtr dom,
+ virVcpuInfoPtr info,
+ int maxinfo,
+ unsigned char *cpumaps,
+ int maplen) {
+ struct qemud_driver *driver = (struct qemud_driver *)dom->conn->privateData;
+ struct qemud_vm *vm = qemudFindVMByUUID(driver, dom->uuid);
+ virNodeInfo nodeinfo;
+ int i, v, maxcpu;
+
+ if (!qemudIsActiveVM(vm)) {
+ qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+ "%s",_("cannot pin vcpus on an inactive
domain"));
+ return -1;
+ }
+
+ if (virNodeInfoPopulate(dom->conn, &nodeinfo) < 0)
+ return -1;
+
+ maxcpu = maplen * 8;
+ if (maxcpu > nodeinfo.cpus)
+ maxcpu = nodeinfo.cpus;
+
+ /* Clamp to actual number of vcpus */
+ if (maxinfo > vm->nvcpupids)
+ maxinfo = vm->nvcpupids;
+
+ if (maxinfo < 1)
+ return 0;
+
+ if (info != NULL) {
+ memset(info, 0, sizeof(*info) * maxinfo);
+ for (i = 0 ; i < maxinfo ; i++) {
+ info[i].number = i;
+ info[i].state = VIR_VCPU_RUNNING;
+ /* XXX cpu time, current pCPU mapping */
+ }
+ }
+
+ if (cpumaps != NULL) {
+ memset(cpumaps, 0, maplen * maxinfo);
+ if (vm->vcpupids != NULL) {
+ for (v = 0 ; v < maxinfo ; v++) {
+ cpu_set_t mask;
+ unsigned char *cpumap = VIR_GET_CPUMAP(cpumaps, maplen, v);
+ CPU_ZERO(&mask);
+
+ if (sched_getaffinity(vm->vcpupids[v], sizeof(mask), &mask) <
0) {
+ qemudReportError(dom->conn, dom, NULL, VIR_ERR_INVALID_ARG,
+ _("cannot get affinity: %s"),
strerror(errno));
+ return -1;
+ }
+
+ for (i = 0 ; i < maxcpu ; i++)
+ if (CPU_ISSET(i, &mask))
+ VIR_USE_CPU(cpumap, i);
+ }
+ } else {
+ qemudReportError(dom->conn, dom, NULL, VIR_ERR_NO_SUPPORT,
+ "%s", _("cpu affinity is not
available"));
+ return -1;
+ }
+ }
+
+ return maxinfo;
}
static int qemudDomainGetMaxVcpus(virDomainPtr dom) {
@@ -3221,8 +3455,8 @@
qemudDomainRestore, /* domainRestore */
NULL, /* domainCoreDump */
qemudDomainSetVcpus, /* domainSetVcpus */
- NULL, /* domainPinVcpu */
- NULL, /* domainGetVcpus */
+ qemudDomainPinVcpu, /* domainPinVcpu */
+ qemudDomainGetVcpus, /* domainGetVcpus */
qemudDomainGetMaxVcpus, /* domainGetMaxVcpus */
qemudDomainDumpXML, /* domainDumpXML */
qemudListDefinedDomains, /* listDomains */
--
|: Red Hat, Engineering, Boston -o-
http://people.redhat.com/berrange/ :|
|:
http://libvirt.org -o-
http://virt-manager.org -o-
http://ovirt.org :|
|:
http://autobuild.org -o-
http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|