[libvirt] [PATCH 0/7] Probe QEMU binary for host CPU and use it for computations

Since QEMU and kvm may filter some host CPU features or add efficiently emulated features, asking QEMU binary for host CPU data provides better results when we later use the data for building guest CPUs. Jiri Denemark (7): cpu: Add support for loading and storing CPU data cpu: Export few x86-specific APIs x86: Ignore CPUID functions greater than 10 qemu: Add monitor APIs to fetch CPUID data from QEMU qemu: Make QMP probing process reusable qemu: Probe QEMU binary for host CPU qemu: Use host CPU from QEMU for computations src/cpu/cpu.c | 41 ++++ src/cpu/cpu.h | 13 ++ src/cpu/cpu_x86.c | 161 +++++++++++--- src/cpu/cpu_x86.h | 10 + src/cpu/cpu_x86_data.h | 1 + src/libvirt_private.syms | 9 + src/qemu/qemu_capabilities.c | 234 ++++++++++++++------- src/qemu/qemu_capabilities.h | 2 + src/qemu/qemu_command.c | 32 ++- src/qemu/qemu_domain.c | 21 +- src/qemu/qemu_monitor.c | 21 ++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 ++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++ tests/qemumonitorjsontest.c | 74 +++++++ 24 files changed, 881 insertions(+), 108 deletions(-) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json -- 1.8.3.2

--- src/cpu/cpu.c | 41 ++++++++++++++ src/cpu/cpu.h | 13 +++++ src/cpu/cpu_x86.c | 135 +++++++++++++++++++++++++++++++++++++++-------- src/libvirt_private.syms | 2 + 4 files changed, 170 insertions(+), 21 deletions(-) diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c index 4124354..8bd689e 100644 --- a/src/cpu/cpu.c +++ b/src/cpu/cpu.c @@ -438,6 +438,47 @@ cpuHasFeature(const virCPUDataPtr data, return driver->hasFeature(data, feature); } +char * +cpuDataFormat(const virCPUDataPtr data) +{ + struct cpuArchDriver *driver; + + VIR_DEBUG("data=%p", data); + + if (!(driver = cpuGetSubDriver(data->arch))) + return NULL; + + if (!driver->dataFormat) { + virReportError(VIR_ERR_NO_SUPPORT, + _("cannot format %s CPU data"), + virArchToString(data->arch)); + return NULL; + } + + return driver->dataFormat(data); +} + +virCPUDataPtr +cpuDataParse(virArch arch, + const char *xmlStr) +{ + struct cpuArchDriver *driver; + + VIR_DEBUG("arch=%s, xmlStr=%s", virArchToString(arch), xmlStr); + + if (!(driver = cpuGetSubDriver(arch))) + return NULL; + + if (!driver->dataParse) { + virReportError(VIR_ERR_NO_SUPPORT, + _("cannot parse %s CPU data"), + virArchToString(arch)); + return NULL; + } + + return driver->dataParse(xmlStr); +} + bool cpuModelIsAllowed(const char *model, const char **models, diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h index 4003435..e1473fe 100644 --- a/src/cpu/cpu.h +++ b/src/cpu/cpu.h @@ -91,6 +91,11 @@ typedef int (*cpuArchHasFeature) (const virCPUDataPtr data, const char *feature); +typedef char * +(*cpuArchDataFormat)(const virCPUDataPtr data); + +typedef virCPUDataPtr +(*cpuArchDataParse) (const char *xmlStr); struct cpuArchDriver { const char *name; @@ -105,6 +110,8 @@ struct cpuArchDriver { cpuArchBaseline baseline; cpuArchUpdate update; cpuArchHasFeature hasFeature; + cpuArchDataFormat dataFormat; + cpuArchDataParse dataParse; }; @@ -165,6 +172,12 @@ extern int cpuHasFeature(const virCPUDataPtr data, const char *feature); +char * +cpuDataFormat(const virCPUDataPtr data); + +virCPUDataPtr +cpuDataParse(virArch arch, + const char *xmlStr); bool cpuModelIsAllowed(const char *model, diff --git a/src/cpu/cpu_x86.c b/src/cpu/cpu_x86.c index a388f0f..560a2a9 100644 --- a/src/cpu/cpu_x86.c +++ b/src/cpu/cpu_x86.c @@ -666,12 +666,42 @@ x86FeatureNames(const struct x86_map *map, static int +x86ParseCPUID(xmlXPathContextPtr ctxt, + struct cpuX86cpuid *cpuid) +{ + unsigned long fun, eax, ebx, ecx, edx; + int ret_fun, ret_eax, ret_ebx, ret_ecx, ret_edx; + + memset(cpuid, 0, sizeof(*cpuid)); + + fun = eax = ebx = ecx = edx = 0; + ret_fun = virXPathULongHex("string(@function)", ctxt, &fun); + ret_eax = virXPathULongHex("string(@eax)", ctxt, &eax); + ret_ebx = virXPathULongHex("string(@ebx)", ctxt, &ebx); + ret_ecx = virXPathULongHex("string(@ecx)", ctxt, &ecx); + ret_edx = virXPathULongHex("string(@edx)", ctxt, &edx); + + if (ret_fun < 0 || ret_eax == -2 || ret_ebx == -2 + || ret_ecx == -2 || ret_edx == -2) + return -1; + + cpuid->function = fun; + cpuid->eax = eax; + cpuid->ebx = ebx; + cpuid->ecx = ecx; + cpuid->edx = edx; + return 0; +} + + +static int x86FeatureLoad(xmlXPathContextPtr ctxt, struct x86_map *map) { xmlNodePtr *nodes = NULL; xmlNodePtr ctxt_node = ctxt->node; struct x86_feature *feature; + struct cpuX86cpuid cpuid; int ret = 0; size_t i; int n; @@ -697,31 +727,13 @@ x86FeatureLoad(xmlXPathContextPtr ctxt, goto ignore; for (i = 0; i < n; i++) { - struct cpuX86cpuid cpuid; - unsigned long fun, eax, ebx, ecx, edx; - int ret_fun, ret_eax, ret_ebx, ret_ecx, ret_edx; - ctxt->node = nodes[i]; - fun = eax = ebx = ecx = edx = 0; - ret_fun = virXPathULongHex("string(@function)", ctxt, &fun); - ret_eax = virXPathULongHex("string(@eax)", ctxt, &eax); - ret_ebx = virXPathULongHex("string(@ebx)", ctxt, &ebx); - ret_ecx = virXPathULongHex("string(@ecx)", ctxt, &ecx); - ret_edx = virXPathULongHex("string(@edx)", ctxt, &edx); - - if (ret_fun < 0 || ret_eax == -2 || ret_ebx == -2 - || ret_ecx == -2 || ret_edx == -2) { + if (x86ParseCPUID(ctxt, &cpuid) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, - _("Invalid cpuid[%zu] in %s feature"), i, feature->name); + _("Invalid cpuid[%zu] in %s feature"), + i, feature->name); goto ignore; } - - cpuid.function = fun; - cpuid.eax = eax; - cpuid.ebx = ebx; - cpuid.ecx = ecx; - cpuid.edx = edx; - if (x86DataAddCpuid(feature->data, &cpuid)) goto error; } @@ -1124,6 +1136,85 @@ error: } +static char * +x86CPUDataFormat(const virCPUDataPtr data) +{ + struct data_iterator iter = DATA_ITERATOR_INIT(data->data.x86); + struct cpuX86cpuid *cpuid; + virBuffer buf = VIR_BUFFER_INITIALIZER; + + virBufferAddLit(&buf, "<cpudata arch='x86'>\n"); + while ((cpuid = x86DataCpuidNext(&iter))) { + virBufferAsprintf(&buf, + " <cpuid function='0x%08x'" + " eax='0x%08x' ebx='0x%08x'" + " ecx='0x%08x' edx='0x%08x'/>\n", + cpuid->function, + cpuid->eax, cpuid->ebx, cpuid->ecx, cpuid->edx); + } + virBufferAddLit(&buf, "</cpudata>\n"); + + if (virBufferError(&buf)) { + virBufferFreeAndReset(&buf); + virReportOOMError(); + return NULL; + } + + return virBufferContentAndReset(&buf); +} + + +static virCPUDataPtr +x86CPUDataParse(const char *xmlStr) +{ + xmlDocPtr xml = NULL; + xmlXPathContextPtr ctxt = NULL; + xmlNodePtr *nodes = NULL; + virCPUDataPtr cpuData = NULL; + struct cpuX86Data *data = NULL; + struct cpuX86cpuid cpuid; + size_t i; + int n; + + if (VIR_ALLOC(data) < 0) + goto cleanup; + + if (!(xml = virXMLParseStringCtxt(xmlStr, _("CPU data"), &ctxt))) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("cannot parse CPU data")); + goto cleanup; + } + ctxt->node = xmlDocGetRootElement(xml); + + n = virXPathNodeSet("/cpudata[@arch='x86']/data", ctxt, &nodes); + if (n < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("no x86 CPU data found")); + goto cleanup; + } + + for (i = 0; i < n; i++) { + ctxt->node = nodes[i]; + if (x86ParseCPUID(ctxt, &cpuid) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("failed to parse cpuid[%zu]"), i); + goto cleanup; + } + if (x86DataAddCpuid(data, &cpuid) < 0) + goto cleanup; + } + + cpuData = x86MakeCPUData(VIR_ARCH_X86_64, &data); + +cleanup: + VIR_FREE(nodes); + xmlXPathFreeContext(ctxt); + xmlFreeDoc(xml); + x86DataFree(data); + return cpuData; +} + + /* A helper macro to exit the cpu computation function without writing * redundant code: * MSG: error message @@ -1920,4 +2011,6 @@ struct cpuArchDriver cpuDriverX86 = { .baseline = x86Baseline, .update = x86Update, .hasFeature = x86HasFeature, + .dataFormat = x86CPUDataFormat, + .dataParse = x86CPUDataParse, }; diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 76873ad..e0f3876 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -712,7 +712,9 @@ cpuBaseline; cpuBaselineXML; cpuCompare; cpuCompareXML; +cpuDataFormat; cpuDataFree; +cpuDataParse; cpuDecode; cpuEncode; cpuGuestData; -- 1.8.3.2

On Tue, Jul 23, 2013 at 06:11:30PM +0200, Jiri Denemark wrote: Could use a more verbose commit message. Looking at later patches, I see cpuDataFormat used in the test suite, but I don't see cpuDataParse used anywhere ? If these are only for the test suite, then perhaps adding them to a cpupriv.h header is preferrable.
--- src/cpu/cpu.c | 41 ++++++++++++++ src/cpu/cpu.h | 13 +++++ src/cpu/cpu_x86.c | 135 +++++++++++++++++++++++++++++++++++++++-------- src/libvirt_private.syms | 2 + 4 files changed, 170 insertions(+), 21 deletions(-)
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Tue, Jul 23, 2013 at 17:27:41 +0100, Daniel Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:30PM +0200, Jiri Denemark wrote:
Could use a more verbose commit message.
Looking at later patches, I see cpuDataFormat used in the test suite, but I don't see cpuDataParse used anywhere ? If these are only for the test suite, then perhaps adding them to a cpupriv.h header is preferrable.
Yeah, cpupriv.h might be better since both APIs are really there just for our testsuite. Although I guess marking them as testsuite only in cpu.h would work too. And you're right, cpuDataParse is not used anywhere but I plan to use it in other cpu related tests in the future and having them both in a single patch seemed best to me. Jirka

--- src/cpu/cpu_x86.c | 21 ++++++++++++++++++--- src/cpu/cpu_x86.h | 10 ++++++++++ src/libvirt_private.syms | 7 +++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/cpu/cpu_x86.c b/src/cpu/cpu_x86.c index 560a2a9..dbbcfd2 100644 --- a/src/cpu/cpu_x86.c +++ b/src/cpu/cpu_x86.c @@ -203,7 +203,7 @@ x86DataCpuid(const struct cpuX86Data *data, } -static void +void x86DataFree(struct cpuX86Data *data) { if (data == NULL) @@ -215,7 +215,7 @@ x86DataFree(struct cpuX86Data *data) } -static virCPUDataPtr +virCPUDataPtr x86MakeCPUData(virArch arch, struct cpuX86Data **data) { virCPUDataPtr cpuData; @@ -295,7 +295,7 @@ x86DataExpand(struct cpuX86Data *data, } -static int +int x86DataAddCpuid(struct cpuX86Data *data, const struct cpuX86cpuid *cpuid) { @@ -323,6 +323,21 @@ x86DataAddCpuid(struct cpuX86Data *data, } +int +x86DataSetVendor(struct cpuX86Data *data, + const char *vendor) +{ + struct cpuX86cpuid cpuid; + + cpuid.function = 0; + cpuid.ebx = virReadBufInt32LE(vendor); + cpuid.edx = virReadBufInt32LE(vendor + 4); + cpuid.ecx = virReadBufInt32LE(vendor + 8); + + return x86DataAddCpuid(data, &cpuid); +} + + static int x86DataAdd(struct cpuX86Data *data1, const struct cpuX86Data *data2) diff --git a/src/cpu/cpu_x86.h b/src/cpu/cpu_x86.h index 77965b7..8235076 100644 --- a/src/cpu/cpu_x86.h +++ b/src/cpu/cpu_x86.h @@ -25,7 +25,17 @@ # define __VIR_CPU_X86_H__ # include "cpu.h" +# include "cpu_x86_data.h" extern struct cpuArchDriver cpuDriverX86; +int x86DataAddCpuid(struct cpuX86Data *data, + const struct cpuX86cpuid *cpuid); +int x86DataSetVendor(struct cpuX86Data *data, + const char *vendor); + +void x86DataFree(struct cpuX86Data *data); + +virCPUDataPtr x86MakeCPUData(virArch arch, struct cpuX86Data **data); + #endif /* __VIR_CPU_X86_H__ */ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index e0f3876..7fe2bc0 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -724,6 +724,13 @@ cpuNodeData; cpuUpdate; +# cpu/cpu_x86.h +x86DataAddCpuid; +x86DataFree; +x86DataSetVendor; +x86MakeCPUData; + + # datatypes.h virConnectClass; virDomainClass; -- 1.8.3.2

On Tue, Jul 23, 2013 at 06:11:31PM +0200, Jiri Denemark wrote: This is adding a new function as well as exporting some existing functions, so should probably be split in two.
--- src/cpu/cpu_x86.c | 21 ++++++++++++++++++--- src/cpu/cpu_x86.h | 10 ++++++++++ src/libvirt_private.syms | 7 +++++++ 3 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/src/cpu/cpu_x86.c b/src/cpu/cpu_x86.c index 560a2a9..dbbcfd2 100644 --- a/src/cpu/cpu_x86.c +++ b/src/cpu/cpu_x86.c @@ -203,7 +203,7 @@ x86DataCpuid(const struct cpuX86Data *data, }
-static void +void x86DataFree(struct cpuX86Data *data)
These are kind of generic names to be exposing outside the file. It would be nice to update this file to use a better naming prefix for all its functions "virCPUx86DataNNNNN"
{ if (data == NULL) @@ -215,7 +215,7 @@ x86DataFree(struct cpuX86Data *data) }
-static virCPUDataPtr +virCPUDataPtr x86MakeCPUData(virArch arch, struct cpuX86Data **data) { virCPUDataPtr cpuData; @@ -295,7 +295,7 @@ x86DataExpand(struct cpuX86Data *data, }
-static int +int x86DataAddCpuid(struct cpuX86Data *data, const struct cpuX86cpuid *cpuid) { @@ -323,6 +323,21 @@ x86DataAddCpuid(struct cpuX86Data *data, }
+int +x86DataSetVendor(struct cpuX86Data *data, + const char *vendor) +{ + struct cpuX86cpuid cpuid; + + cpuid.function = 0; + cpuid.ebx = virReadBufInt32LE(vendor); + cpuid.edx = virReadBufInt32LE(vendor + 4); + cpuid.ecx = virReadBufInt32LE(vendor + 8); + + return x86DataAddCpuid(data, &cpuid); +} + + static int x86DataAdd(struct cpuX86Data *data1, const struct cpuX86Data *data2) diff --git a/src/cpu/cpu_x86.h b/src/cpu/cpu_x86.h index 77965b7..8235076 100644 --- a/src/cpu/cpu_x86.h +++ b/src/cpu/cpu_x86.h @@ -25,7 +25,17 @@ # define __VIR_CPU_X86_H__
# include "cpu.h" +# include "cpu_x86_data.h"
extern struct cpuArchDriver cpuDriverX86;
+int x86DataAddCpuid(struct cpuX86Data *data, + const struct cpuX86cpuid *cpuid); +int x86DataSetVendor(struct cpuX86Data *data, + const char *vendor); + +void x86DataFree(struct cpuX86Data *data); + +virCPUDataPtr x86MakeCPUData(virArch arch, struct cpuX86Data **data); + #endif /* __VIR_CPU_X86_H__ */ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index e0f3876..7fe2bc0 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -724,6 +724,13 @@ cpuNodeData; cpuUpdate;
+# cpu/cpu_x86.h +x86DataAddCpuid; +x86DataFree; +x86DataSetVendor; +x86MakeCPUData; + + # datatypes.h virConnectClass; virDomainClass;
Regards, Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

We don't need to store any CPUID data for function we know nothing about. However, this limit may need to be increased in the future when libvirt learns features described by a CPUID function greater than 10. The comparison is done after subtracting high-bits prefix, so currently functions 0x00000000 to 0x0000000a and 0x80000000 to 0x8000000a are stored. --- src/cpu/cpu_x86.c | 5 +++++ src/cpu/cpu_x86_data.h | 1 + 2 files changed, 6 insertions(+) diff --git a/src/cpu/cpu_x86.c b/src/cpu/cpu_x86.c index dbbcfd2..bd3f2b0 100644 --- a/src/cpu/cpu_x86.c +++ b/src/cpu/cpu_x86.c @@ -314,6 +314,11 @@ x86DataAddCpuid(struct cpuX86Data *data, cpuids = &data->extended; } + if (pos > CPUX86_MAX_FUNCTION) { + VIR_DEBUG("Ignoring unhandled function 0x%x", cpuid->function); + return 0; + } + if (x86DataExpand(data, basic_by, extended_by) < 0) return -1; diff --git a/src/cpu/cpu_x86_data.h b/src/cpu/cpu_x86_data.h index dc972a6..af470e4 100644 --- a/src/cpu/cpu_x86_data.h +++ b/src/cpu/cpu_x86_data.h @@ -36,6 +36,7 @@ struct cpuX86cpuid { # define CPUX86_BASIC 0x0 # define CPUX86_EXTENDED 0x80000000 +# define CPUX86_MAX_FUNCTION 10 struct cpuX86Data { size_t basic_len; -- 1.8.3.2

On Tue, Jul 23, 2013 at 06:11:32PM +0200, Jiri Denemark wrote:
We don't need to store any CPUID data for function we know nothing about. However, this limit may need to be increased in the future when libvirt learns features described by a CPUID function greater than 10. The comparison is done after subtracting high-bits prefix, so currently functions 0x00000000 to 0x0000000a and 0x80000000 to 0x8000000a are stored. --- src/cpu/cpu_x86.c | 5 +++++ src/cpu/cpu_x86_data.h | 1 + 2 files changed, 6 insertions(+)
ACK Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

--- src/qemu/qemu_monitor.c | 21 +++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 +++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++++ tests/qemumonitorjsontest.c | 74 ++++++++++ 14 files changed, 465 insertions(+) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 0b73411..695cf19 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -3839,3 +3839,24 @@ qemuMonitorGetDeviceAliases(qemuMonitorPtr mon, return qemuMonitorJSONGetDeviceAliases(mon, aliases); } + +virCPUDefPtr +qemuMonitorGetCPU(qemuMonitorPtr mon, + virArch arch) +{ + VIR_DEBUG("mon=%p, arch=%s", mon, virArchToString(arch)); + + if (!mon) { + virReportError(VIR_ERR_INVALID_ARG, "%s", + _("monitor must not be NULL")); + return NULL; + } + + if (!mon->json) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", + _("JSON monitor is required")); + return NULL; + } + + return qemuMonitorJSONGetCPU(mon, arch); +} diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h index 4a55501..8a312bd 100644 --- a/src/qemu/qemu_monitor.h +++ b/src/qemu/qemu_monitor.h @@ -717,6 +717,9 @@ int qemuMonitorDetachCharDev(qemuMonitorPtr mon, int qemuMonitorGetDeviceAliases(qemuMonitorPtr mon, char ***aliases); +virCPUDefPtr qemuMonitorGetCPU(qemuMonitorPtr mon, + virArch arch); + /** * When running two dd process and using <> redirection, we need a * shell that will not truncate files. These two strings serve that diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index 12f7e69..617bfdf 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -42,6 +42,7 @@ #include "virerror.h" #include "virjson.h" #include "virstring.h" +#include "cpu/cpu_x86.h" #ifdef WITH_DTRACE_PROBES # include "libvirt_qemu_probes.h" @@ -49,6 +50,7 @@ #define VIR_FROM_THIS VIR_FROM_QEMU +#define QOM_CPU_PATH "/machine/unattached/device[0]" #define LINE_ENDING "\r\n" @@ -5453,3 +5455,163 @@ cleanup: VIR_FREE(paths); return ret; } + + +static int +qemuMonitorJSONParseCPUFeatureWord(virJSONValuePtr data, + struct cpuX86cpuid *cpuid) +{ + const char *reg; + unsigned long long fun; + unsigned long long features; + + memset(cpuid, 0, sizeof(*cpuid)); + + if (!(reg = virJSONValueObjectGetString(data, "cpuid-register"))) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("missing cpuid-register in CPU data")); + return -1; + } + if (virJSONValueObjectGetNumberUlong(data, "cpuid-input-eax", &fun)) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("missing or invalid cpuid-input-eax in CPU data")); + return -1; + } + if (virJSONValueObjectGetNumberUlong(data, "features", &features) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("missing or invalid features in CPU data")); + return -1; + } + + cpuid->function = fun; + if (STREQ(reg, "EAX")) { + cpuid->eax = features; + } else if (STREQ(reg, "EBX")) { + cpuid->ebx = features; + } else if (STREQ(reg, "ECX")) { + cpuid->ecx = features; + } else if (STREQ(reg, "EDX")) { + cpuid->edx = features; + } else { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("unknown CPU register '%s'"), reg); + return -1; + } + + return 0; +} + +virCPUDataPtr +qemuMonitorJSONGetCPUData(qemuMonitorPtr mon, + const char *property) +{ + virJSONValuePtr cmd; + virJSONValuePtr reply = NULL; + virJSONValuePtr data; + virCPUDataPtr cpuData = NULL; + struct cpuX86Data *x86Data = NULL; + struct cpuX86cpuid cpuid; + size_t i; + int ret; + int n; + + if (!(cmd = qemuMonitorJSONMakeCommand("qom-get", + "s:path", QOM_CPU_PATH, + "s:property", property, + NULL))) + return NULL; + + ret = qemuMonitorJSONCommand(mon, cmd, &reply); + + if (ret == 0) + ret = qemuMonitorJSONCheckError(cmd, reply); + + if (ret < 0) + goto cleanup; + + if (!(data = virJSONValueObjectGet(reply, "return"))) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("qom-get reply was missing return data")); + goto cleanup; + } + + if ((n = virJSONValueArraySize(data)) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("%s CPU property did not return an array"), + property); + goto cleanup; + } + + if (VIR_ALLOC(x86Data) < 0) + goto cleanup; + + for (i = 0; i < n; i++) { + if (qemuMonitorJSONParseCPUFeatureWord(virJSONValueArrayGet(data, i), + &cpuid) < 0 || + x86DataAddCpuid(x86Data, &cpuid) < 0) + goto cleanup; + } + + cpuData = x86MakeCPUData(VIR_ARCH_X86_64, &x86Data); + +cleanup: + virJSONValueFree(cmd); + virJSONValueFree(reply); + x86DataFree(x86Data); + return cpuData; +} + +static int +qemuMonitorJSONGetCPUVendor(qemuMonitorPtr mon, + virCPUDataPtr data) +{ + qemuMonitorJSONObjectProperty prop; + int ret; + + prop.type = QEMU_MONITOR_OBJECT_PROPERTY_STRING; + if (qemuMonitorJSONGetObjectProperty(mon, QOM_CPU_PATH, + "vendor", &prop) < 0) + return -1; + + ret = x86DataSetVendor(data->data.x86, prop.val.str); + + VIR_FREE(prop.val.str); + return ret; +} + +virCPUDefPtr +qemuMonitorJSONGetCPU(qemuMonitorPtr mon, + virArch arch) +{ + virCPUDefPtr cpu; + virCPUDataPtr data = NULL; + + if (arch != VIR_ARCH_I686 && arch != VIR_ARCH_X86_64) { + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, + _("cannot get CPU data for arch %s"), + virArchToString(arch)); + return NULL; + } + + if (VIR_ALLOC(cpu) < 0) + return NULL; + + cpu->arch = arch; + cpu->type = VIR_CPU_TYPE_HOST; + + if (!(data = qemuMonitorJSONGetCPUData(mon, "feature-words")) || + qemuMonitorJSONGetCPUVendor(mon, data) < 0) + goto error; + + if (cpuDecode(cpu, data, NULL, 0, NULL) < 0) + goto error; + +cleanup: + cpuDataFree(data); + return cpu; + +error: + virCPUDefFree(cpu); + cpu = NULL; + goto cleanup; +} diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h index 51cf19c..ffc8d68 100644 --- a/src/qemu/qemu_monitor_json.h +++ b/src/qemu/qemu_monitor_json.h @@ -29,6 +29,7 @@ # include "qemu_monitor.h" # include "virbitmap.h" +# include "cpu/cpu.h" int qemuMonitorJSONIOProcess(qemuMonitorPtr mon, const char *data, @@ -426,4 +427,9 @@ int qemuMonitorJSONDetachCharDev(qemuMonitorPtr mon, int qemuMonitorJSONGetDeviceAliases(qemuMonitorPtr mon, char ***aliases); +virCPUDataPtr qemuMonitorJSONGetCPUData(qemuMonitorPtr mon, + const char *property); +virCPUDefPtr qemuMonitorJSONGetCPU(qemuMonitorPtr mon, + virArch arch); + #endif /* QEMU_MONITOR_JSON_H */ diff --git a/tests/Makefile.am b/tests/Makefile.am index 5ea806e..8343ce1 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -81,6 +81,7 @@ EXTRA_DIST = \ oomtrace.pl \ qemuhelpdata \ qemuhotplugtestdata \ + qemumonitorjsondata \ qemuxml2argvdata \ qemuxml2xmloutdata \ qemuxmlnsdata \ diff --git a/tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data new file mode 100644 index 0000000..2b3c111 --- /dev/null +++ b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data @@ -0,0 +1,2 @@ +<cpudata arch='x86'> +</cpudata> diff --git a/tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json new file mode 100644 index 0000000..36db555 --- /dev/null +++ b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json @@ -0,0 +1,46 @@ +{ + "return": [ + { + "cpuid-register": "EDX", + "cpuid-input-eax": 2147483658, + "features": 0 + }, + { + "cpuid-register": "EAX", + "cpuid-input-eax": 1073741825, + "features": 0 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 3221225473, + "features": 0 + }, + { + "cpuid-register": "ECX", + "cpuid-input-eax": 2147483649, + "features": 0 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 2147483649, + "features": 0 + }, + { + "cpuid-register": "EBX", + "cpuid-input-ecx": 0, + "cpuid-input-eax": 7, + "features": 0 + }, + { + "cpuid-register": "ECX", + "cpuid-input-eax": 1, + "features": 0 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 1, + "features": 0 + } + ], + "id": "libvirt-6" +} diff --git a/tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data new file mode 100644 index 0000000..57431e2 --- /dev/null +++ b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data @@ -0,0 +1,4 @@ +<cpudata arch='x86'> + <cpuid function='0x00000001' eax='0x00000000' ebx='0x00000000' ecx='0x00401000' edx='0x00000000'/> + <cpuid function='0x00000007' eax='0x00000000' ebx='0x00000fb9' ecx='0x00000000' edx='0x00000000'/> +</cpudata> diff --git a/tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json new file mode 100644 index 0000000..13352f9 --- /dev/null +++ b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json @@ -0,0 +1,46 @@ +{ + "return": [ + { + "cpuid-register": "EDX", + "cpuid-input-eax": 2147483658, + "features": 0 + }, + { + "cpuid-register": "EAX", + "cpuid-input-eax": 1073741825, + "features": 0 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 3221225473, + "features": 0 + }, + { + "cpuid-register": "ECX", + "cpuid-input-eax": 2147483649, + "features": 0 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 2147483649, + "features": 0 + }, + { + "cpuid-register": "EBX", + "cpuid-input-ecx": 0, + "cpuid-input-eax": 7, + "features": 4025 + }, + { + "cpuid-register": "ECX", + "cpuid-input-eax": 1, + "features": 4198400 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 1, + "features": 0 + } + ], + "id": "libvirt-7" +} diff --git a/tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data new file mode 100644 index 0000000..6e3af3c --- /dev/null +++ b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data @@ -0,0 +1,4 @@ +<cpudata arch='x86'> + <cpuid function='0x00000001' eax='0x00000000' ebx='0x00000000' ecx='0x97ba2223' edx='0x078bfbfd'/> + <cpuid function='0x80000001' eax='0x00000000' ebx='0x00000000' ecx='0x00000001' edx='0x28100800'/> +</cpudata> diff --git a/tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json new file mode 100644 index 0000000..29c00b4 --- /dev/null +++ b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json @@ -0,0 +1,46 @@ +{ + "return": [ + { + "cpuid-register": "EDX", + "cpuid-input-eax": 2147483658, + "features": 0 + }, + { + "cpuid-register": "EAX", + "cpuid-input-eax": 1073741825, + "features": 16777275 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 3221225473, + "features": 0 + }, + { + "cpuid-register": "ECX", + "cpuid-input-eax": 2147483649, + "features": 1 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 2147483649, + "features": 672139264 + }, + { + "cpuid-register": "EBX", + "cpuid-input-ecx": 0, + "cpuid-input-eax": 7, + "features": 0 + }, + { + "cpuid-register": "ECX", + "cpuid-input-eax": 1, + "features": 2545558051 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 1, + "features": 126614525 + } + ], + "id": "libvirt-6" +} diff --git a/tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data new file mode 100644 index 0000000..bc3087d --- /dev/null +++ b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data @@ -0,0 +1,5 @@ +<cpudata arch='x86'> + <cpuid function='0x00000001' eax='0x00000000' ebx='0x00000000' ecx='0x97ba2223' edx='0x0f8bfbff'/> + <cpuid function='0x00000007' eax='0x00000000' ebx='0x00000002' ecx='0x00000000' edx='0x00000000'/> + <cpuid function='0x80000001' eax='0x00000000' ebx='0x00000000' ecx='0x00000001' edx='0x2993fbff'/> +</cpudata> diff --git a/tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json new file mode 100644 index 0000000..b5fb9f3 --- /dev/null +++ b/tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json @@ -0,0 +1,45 @@ +{ + "return": [ + { + "cpuid-register": "EDX", + "cpuid-input-eax": 2147483658, + "features": 0 + }, + { + "cpuid-register": "EAX", + "cpuid-input-eax": 1073741825, + "features": 16777339 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 3221225473, + "features": 0 + }, + { + "cpuid-register": "ECX", + "cpuid-input-eax": 2147483649, + "features": 1 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 2147483649, + "features": 697564159 + }, + { + "cpuid-register": "EBX", + "cpuid-input-ecx": 0, + "cpuid-input-eax": 7, + "features": 2 + }, + { + "cpuid-register": "ECX", + "cpuid-input-eax": 1, + "features": 2545558051 + }, + { + "cpuid-register": "EDX", + "cpuid-input-eax": 1, + "features": 260832255 + } + ] +} diff --git a/tests/qemumonitorjsontest.c b/tests/qemumonitorjsontest.c index 9e66059..aec16bb 100644 --- a/tests/qemumonitorjsontest.c +++ b/tests/qemumonitorjsontest.c @@ -27,6 +27,7 @@ #include "virthread.h" #include "virerror.h" #include "virstring.h" +#include "cpu/cpu.h" #define VIR_FROM_THIS VIR_FROM_NONE @@ -943,6 +944,66 @@ cleanup: } +struct testCPUData { + const char *name; + const char *property; + const virDomainXMLOptionPtr xmlopt; +}; + +static int +testQemuMonitorJSONGetCPUData(const void *opaque) +{ + const struct testCPUData *data = opaque; + qemuMonitorTestPtr test = qemuMonitorTestNew(true, data->xmlopt); + virCPUDataPtr cpuData = NULL; + char *jsonFile = NULL; + char *dataFile = NULL; + char *jsonStr = NULL; + char *expected = NULL; + char *actual = NULL; + int ret = -1; + + if (!test) + return -1; + + if (virAsprintf(&jsonFile, + "%s/qemumonitorjsondata/qemumonitorjson-getcpu-%s.json", + abs_srcdir, data->name) < 0 || + virAsprintf(&dataFile, + "%s/qemumonitorjsondata/qemumonitorjson-getcpu-%s.data", + abs_srcdir, data->name) < 0) + goto cleanup; + + if (virtTestLoadFile(jsonFile, &jsonStr) < 0 || + virtTestLoadFile(dataFile, &expected) < 0) + goto cleanup; + + if (qemuMonitorTestAddItem(test, "qom-get", jsonStr) < 0) + goto cleanup; + + cpuData = qemuMonitorJSONGetCPUData(qemuMonitorTestGetMonitor(test), + data->property); + if (!cpuData || !(actual = cpuDataFormat(cpuData))) + goto cleanup; + + if (STRNEQ(expected, actual)) { + virtTestDifference(stderr, expected, actual); + goto cleanup; + } + + ret = 0; +cleanup: + VIR_FREE(jsonFile); + VIR_FREE(dataFile); + VIR_FREE(jsonStr); + VIR_FREE(expected); + VIR_FREE(actual); + cpuDataFree(cpuData); + qemuMonitorTestFree(test); + return ret; +} + + static int mymain(void) { @@ -964,6 +1025,14 @@ mymain(void) if (virtTestRun(# name, 1, testQemuMonitorJSON ## name, xmlopt) < 0) \ ret = -1 +#define DO_TEST_CPU_DATA(name, property) \ + do { \ + struct testCPUData data = { name, property, xmlopt }; \ + const char *label = "GetCPU(" property ", " name ")"; \ + if (virtTestRun(label, 1, testQemuMonitorJSONGetCPUData, &data) < 0)\ + ret = -1; \ + } while (0) + DO_TEST(GetStatus); DO_TEST(GetVersion); DO_TEST(GetMachines); @@ -978,6 +1047,11 @@ mymain(void) DO_TEST(SetObjectProperty); DO_TEST(GetDeviceAliases); + DO_TEST_CPU_DATA("host", "feature-words"); + DO_TEST_CPU_DATA("full", "feature-words"); + DO_TEST_CPU_DATA("filtered", "filtered-features"); + DO_TEST_CPU_DATA("empty", "filtered-features"); + virObjectUnref(xmlopt); return (ret == 0) ? EXIT_SUCCESS : EXIT_FAILURE; -- 1.8.3.2

On Tue, Jul 23, 2013 at 06:11:33PM +0200, Jiri Denemark wrote:
--- src/qemu/qemu_monitor.c | 21 +++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 +++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++++ tests/qemumonitorjsontest.c | 74 ++++++++++ 14 files changed, 465 insertions(+) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json
ACK, though I believe the design of this monitor API is flawed because it requires you to re-launch QEMU with different accel args Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Tue, Jul 23, 2013 at 17:32:42 +0100, Daniel Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:33PM +0200, Jiri Denemark wrote:
--- src/qemu/qemu_monitor.c | 21 +++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 +++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++++ tests/qemumonitorjsontest.c | 74 ++++++++++ 14 files changed, 465 insertions(+) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json
ACK, though I believe the design of this monitor API is flawed because it requires you to re-launch QEMU with different accel args
Not really, this can be used in tcg too. It's just when we want to get the data for "host" CPU, we need to enable kvm as tcg knows nothing about that CPU. Which makes sense as kvm (the kernel module) influences how the "host" CPU will look like. Jirka

On Tue, Jul 23, 2013 at 19:28:38 +0200, Jiri Denemark wrote:
On Tue, Jul 23, 2013 at 17:32:42 +0100, Daniel Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:33PM +0200, Jiri Denemark wrote:
--- src/qemu/qemu_monitor.c | 21 +++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 +++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++++ tests/qemumonitorjsontest.c | 74 ++++++++++ 14 files changed, 465 insertions(+) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json
ACK, though I believe the design of this monitor API is flawed because it requires you to re-launch QEMU with different accel args
Not really, this can be used in tcg too. It's just when we want to get the data for "host" CPU, we need to enable kvm as tcg knows nothing about that CPU. Which makes sense as kvm (the kernel module) influences how the "host" CPU will look like.
However, you need to have a CPU to be able to ask for his properties (which kinda makes sense too) and for that you also need a machine with type != none. Which makes sense too, as the CPU may differ depending on machine type (which, however, does not happen for "host" CPU). Jirka

On Tue, Jul 23, 2013 at 07:32:46PM +0200, Jiri Denemark wrote:
On Tue, Jul 23, 2013 at 19:28:38 +0200, Jiri Denemark wrote:
On Tue, Jul 23, 2013 at 17:32:42 +0100, Daniel Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:33PM +0200, Jiri Denemark wrote:
--- src/qemu/qemu_monitor.c | 21 +++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 +++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++++ tests/qemumonitorjsontest.c | 74 ++++++++++ 14 files changed, 465 insertions(+) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json
ACK, though I believe the design of this monitor API is flawed because it requires you to re-launch QEMU with different accel args
Not really, this can be used in tcg too. It's just when we want to get the data for "host" CPU, we need to enable kvm as tcg knows nothing about that CPU. Which makes sense as kvm (the kernel module) influences how the "host" CPU will look like.
However, you need to have a CPU to be able to ask for his properties (which kinda makes sense too) and for that you also need a machine with type != none. Which makes sense too, as the CPU may differ depending on machine type (which, however, does not happen for "host" CPU).
In addition to the "-cpu host" KVM initialization problem, this is an additional problem with the current interfaces provided by QEMU: 1) libvirt needs to query data that depend on chosen machine-type and CPU model 2) Some machine-type behavior is code and not introspectable data * Luckily most of the data we need in this case should/will be encoded in the compat_props tables. * In either case, we don't have an API to query for machine-type compat_props information yet. 3) CPU model behavior will be modelled as CPU class behavior. Like on the machine-type case, some of the CPU-model-specific behavior may be modelled as code, and not introspectable data. * However, e may be able to eventually encode most or all of CPU-model-specific behavior simply as different per-CPU-class property defaults. * In either case, we don't have an API for QOM class introspection, yet. But there's something important in this case: the resulting CPUID data for a specific machine-type + CPU-model combination must be always the same, forever. This means libvirt may even use a static table, or cache this information indefinitely. (Note that I am not talking about "-cpu host", here, but about all the other CPU models) -- Eduardo

Am 24.07.2013 20:25, schrieb Eduardo Habkost:
In addition to the "-cpu host" KVM initialization problem, this is an additional problem with the current interfaces provided by QEMU:
1) libvirt needs to query data that depend on chosen machine-type and CPU model 2) Some machine-type behavior is code and not introspectable data * Luckily most of the data we need in this case should/will be encoded in the compat_props tables. * In either case, we don't have an API to query for machine-type compat_props information yet.
... and I don't think we should add such a thing. It is an internal implementation detail, whose results should be inspected instead.
3) CPU model behavior will be modelled as CPU class behavior. Like on the machine-type case, some of the CPU-model-specific behavior may be modelled as code, and not introspectable data. * However, e may be able to eventually encode most or all of CPU-model-specific behavior simply as different per-CPU-class property defaults. * In either case, we don't have an API for QOM class introspection, yet.
If I understood Anthony correctly on the previous call then that is by design. We have a query-cpu-definitions QMP API to obtain CPU models though. And qom-list-types to discover QOM types. The CPU can then be instantiated via -cpu (the type via -device/-object on other targets), inspected with qom-list/qom-get API and modified with qom-set. The problem with the latter is that devices/CPUs get realized in code rather than shortly before emulation/virtualization starts - that's what my recursive QOM realization series prepared to address, but Paolo veto'ed that and hasn't provided sufficient feedback on what exactly his concerns are founded on and what he proposes instead. In particular: Would walking the qdev bus tree instead of the QOM composition tree address the concerns? Depending on what libvirt is actually trying to do, the above combined with Igor's feature properties and -S might do the job. For x86 the CPUs are easily locatable in the QOM tree via ICC. Regards, Andreas
But there's something important in this case: the resulting CPUID data for a specific machine-type + CPU-model combination must be always the same, forever. This means libvirt may even use a static table, or cache this information indefinitely.
(Note that I am not talking about "-cpu host", here, but about all the other CPU models)
-- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

On Thu, Jul 25, 2013 at 11:14:16AM +0200, Andreas Färber wrote:
Am 24.07.2013 20:25, schrieb Eduardo Habkost:
In addition to the "-cpu host" KVM initialization problem, this is an additional problem with the current interfaces provided by QEMU:
1) libvirt needs to query data that depend on chosen machine-type and CPU model 2) Some machine-type behavior is code and not introspectable data * Luckily most of the data we need in this case should/will be encoded in the compat_props tables. * In either case, we don't have an API to query for machine-type compat_props information yet.
... and I don't think we should add such a thing. It is an internal implementation detail, whose results should be inspected instead.
3) CPU model behavior will be modelled as CPU class behavior. Like on the machine-type case, some of the CPU-model-specific behavior may be modelled as code, and not introspectable data. * However, e may be able to eventually encode most or all of CPU-model-specific behavior simply as different per-CPU-class property defaults. * In either case, we don't have an API for QOM class introspection, yet.
If I understood Anthony correctly on the previous call then that is by design. We have a query-cpu-definitions QMP API to obtain CPU models though. And qom-list-types to discover QOM types. The CPU can then be instantiated via -cpu (the type via -device/-object on other targets), inspected with qom-list/qom-get API and modified with qom-set.
The problem with the latter is that devices/CPUs get realized in code rather than shortly before emulation/virtualization starts - that's what my recursive QOM realization series prepared to address, but Paolo veto'ed that and hasn't provided sufficient feedback on what exactly his concerns are founded on and what he proposes instead. In particular: Would walking the qdev bus tree instead of the QOM composition tree address the concerns?
Depending on what libvirt is actually trying to do, the above combined with Igor's feature properties and -S might do the job. For x86 the CPUs are easily locatable in the QOM tree via ICC.
I still don't see how the above would solve the bigger problem: libvirt needs a way to find out how exactly "-machine foo-1.0 -cpu bar" looks different from "-machine foo-1.1 -cpu bar", but don't want to execute QEMU multiple times just to find that out. -- Eduardo

Am 25.07.2013 16:00, schrieb Eduardo Habkost:
libvirt needs a way to find out how exactly "-machine foo-1.0 -cpu bar" looks different from "-machine foo-1.1 -cpu bar",
Why? (What's the actual use case?) Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

On Thu, Jul 25, 2013 at 04:09:18PM +0200, Andreas Färber wrote:
Am 25.07.2013 16:00, schrieb Eduardo Habkost:
libvirt needs a way to find out how exactly "-machine foo-1.0 -cpu bar" looks different from "-machine foo-1.1 -cpu bar",
Why? (What's the actual use case?)
It already takes a long time to just probe each QEMU binary, without expanding that to probe each binary once for each machine type they include. The idea of 'qemu -M none' was that we'd have a machine type which did not do any hardware set, to query any aspect of the QEMU binary capabilities. If we have to also invoke qemu again with every other machine type that is a failed design IMHO. It should not be beyond the realm of possibility to make 'qemu -M none' provide information about the CPU features for machines/cpus without needing to actually creating instances of those machines. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Thu, Jul 25, 2013 at 04:09:18PM +0200, Andreas Färber wrote:
Am 25.07.2013 16:00, schrieb Eduardo Habkost:
libvirt needs a way to find out how exactly "-machine foo-1.0 -cpu bar" looks different from "-machine foo-1.1 -cpu bar",
Why? (What's the actual use case?)
libvirt API allows individual CPU features to be configured, so libvirt needs to know what exactly will be the result of using a machine-type/CPU-model combination to make sure it will be exactly what was requested: http://libvirt.org/formatdomain.html#elementsCPU Also, libvirt needs to be able to check if migration to a host is possible (i.e. if all features enabled by a machine-type/CPU-model combination are supported by the host) before actually starting the migration process. -- Eduardo

Am 25.07.2013 20:02, schrieb Eduardo Habkost:
On Thu, Jul 25, 2013 at 04:09:18PM +0200, Andreas Färber wrote:
Am 25.07.2013 16:00, schrieb Eduardo Habkost:
libvirt needs a way to find out how exactly "-machine foo-1.0 -cpu bar" looks different from "-machine foo-1.1 -cpu bar",
Why? (What's the actual use case?)
libvirt API allows individual CPU features to be configured, so libvirt needs to know what exactly will be the result of using a machine-type/CPU-model combination to make sure it will be exactly what was requested: http://libvirt.org/formatdomain.html#elementsCPU
That's exactly what you added properties for last minute in v1.5! libvirt instantiates qemu-system-x86_64 -cpu foo,+x,+y and then checks that it got what it wanted - if not, die, otherwise continue with virtualization. One process.
Also, libvirt needs to be able to check if migration to a host is possible (i.e. if all features enabled by a machine-type/CPU-model combination are supported by the host) before actually starting the migration process.
That's one process on the destination with one -machine pc-i440-x.y. Is the problem possibly rather that -incoming and QMP exclude each other? Then we should fix that instead by starting incoming migration from QMP in the same process that we used to check that migration will be possible without guest-visible changes. Andreas -- SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg

On Fri, Jul 26, 2013 at 02:31:24PM +0200, Andreas Färber wrote:
Am 25.07.2013 20:02, schrieb Eduardo Habkost:
On Thu, Jul 25, 2013 at 04:09:18PM +0200, Andreas Färber wrote:
Am 25.07.2013 16:00, schrieb Eduardo Habkost:
libvirt needs a way to find out how exactly "-machine foo-1.0 -cpu bar" looks different from "-machine foo-1.1 -cpu bar",
Why? (What's the actual use case?)
libvirt API allows individual CPU features to be configured, so libvirt needs to know what exactly will be the result of using a machine-type/CPU-model combination to make sure it will be exactly what was requested: http://libvirt.org/formatdomain.html#elementsCPU
That's exactly what you added properties for last minute in v1.5!
libvirt instantiates qemu-system-x86_64 -cpu foo,+x,+y and then checks that it got what it wanted - if not, die, otherwise continue with virtualization. One process.
I believe libvirt needs to know what are the results of the CPU model + machine-type combination at other moments, even before building the QEMU command-line. But I may be mistaken, so I hope the libvirt developers can clarify that.
Also, libvirt needs to be able to check if migration to a host is possible (i.e. if all features enabled by a machine-type/CPU-model combination are supported by the host) before actually starting the migration process.
That's one process on the destination with one -machine pc-i440-x.y. Is the problem possibly rather that -incoming and QMP exclude each other? Then we should fix that instead by starting incoming migration from QMP in the same process that we used to check that migration will be possible without guest-visible changes.
I don't know the answer for that, and I also don't know if it is acceptable for libvirt to be required to execute QEMU just to find out if migration to a host is possible. I am thinking of cases where there may be dozens or hundreds of hosts available, and some management system needs to find out quickly what are the best candidates to which a large set of VMs can be migrated. -- Eduardo

On Wed, Jul 24, 2013 at 03:25:19PM -0300, Eduardo Habkost wrote:
On Tue, Jul 23, 2013 at 07:32:46PM +0200, Jiri Denemark wrote:
On Tue, Jul 23, 2013 at 19:28:38 +0200, Jiri Denemark wrote:
On Tue, Jul 23, 2013 at 17:32:42 +0100, Daniel Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:33PM +0200, Jiri Denemark wrote:
--- src/qemu/qemu_monitor.c | 21 +++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 +++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++++ tests/qemumonitorjsontest.c | 74 ++++++++++ 14 files changed, 465 insertions(+) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json
ACK, though I believe the design of this monitor API is flawed because it requires you to re-launch QEMU with different accel args
Not really, this can be used in tcg too. It's just when we want to get the data for "host" CPU, we need to enable kvm as tcg knows nothing about that CPU. Which makes sense as kvm (the kernel module) influences how the "host" CPU will look like.
However, you need to have a CPU to be able to ask for his properties (which kinda makes sense too) and for that you also need a machine with type != none. Which makes sense too, as the CPU may differ depending on machine type (which, however, does not happen for "host" CPU).
In addition to the "-cpu host" KVM initialization problem, this is an additional problem with the current interfaces provided by QEMU:
1) libvirt needs to query data that depend on chosen machine-type and CPU model 2) Some machine-type behavior is code and not introspectable data * Luckily most of the data we need in this case should/will be encoded in the compat_props tables. * In either case, we don't have an API to query for machine-type compat_props information yet. 3) CPU model behavior will be modelled as CPU class behavior. Like on the machine-type case, some of the CPU-model-specific behavior may be modelled as code, and not introspectable data. * However, e may be able to eventually encode most or all of CPU-model-specific behavior simply as different per-CPU-class property defaults. * In either case, we don't have an API for QOM class introspection, yet.
But there's something important in this case: the resulting CPUID data for a specific machine-type + CPU-model combination must be always the same, forever. This means libvirt may even use a static table, or cache this information indefinitely.
(Note that I am not talking about "-cpu host", here, but about all the other CPU models)
Hmm, so if the CPU filtering can vary per every single individual machine type, then the approach Jiri started here, of invoking QEMU with machine type set to query the CPU after it was created, is definitely not something we can follow. It is just far too inefficient. I understand that the QEMU code isn't currently structured in a way that lets it easily expose information that varies per machine type, but I don't think we need to solve the entire problem space in a perfectly generic fashion here. Perfect is the enemy of good. If we can get all the CPU feature flag filtering information to be in statically defined data structures, then it seems that it would be pretty straightforward to add a monitor API that takes a CPU model name and machine type name, and returns the list of feature flags, without actually having to initialize the machine type or CPU. It can even just open /dev/kvm & issue the neccessary ioctl, without having to initialize the entire KVM CPU subsystem in QEMU. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Thu, Jul 25, 2013 at 10:45:10AM +0100, Daniel P. Berrange wrote:
On Wed, Jul 24, 2013 at 03:25:19PM -0300, Eduardo Habkost wrote:
On Tue, Jul 23, 2013 at 07:32:46PM +0200, Jiri Denemark wrote:
On Tue, Jul 23, 2013 at 19:28:38 +0200, Jiri Denemark wrote:
On Tue, Jul 23, 2013 at 17:32:42 +0100, Daniel Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:33PM +0200, Jiri Denemark wrote:
--- src/qemu/qemu_monitor.c | 21 +++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 +++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++++ tests/qemumonitorjsontest.c | 74 ++++++++++ 14 files changed, 465 insertions(+) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json
ACK, though I believe the design of this monitor API is flawed because it requires you to re-launch QEMU with different accel args
Not really, this can be used in tcg too. It's just when we want to get the data for "host" CPU, we need to enable kvm as tcg knows nothing about that CPU. Which makes sense as kvm (the kernel module) influences how the "host" CPU will look like.
However, you need to have a CPU to be able to ask for his properties (which kinda makes sense too) and for that you also need a machine with type != none. Which makes sense too, as the CPU may differ depending on machine type (which, however, does not happen for "host" CPU).
In addition to the "-cpu host" KVM initialization problem, this is an additional problem with the current interfaces provided by QEMU:
1) libvirt needs to query data that depend on chosen machine-type and CPU model 2) Some machine-type behavior is code and not introspectable data * Luckily most of the data we need in this case should/will be encoded in the compat_props tables. * In either case, we don't have an API to query for machine-type compat_props information yet. 3) CPU model behavior will be modelled as CPU class behavior. Like on the machine-type case, some of the CPU-model-specific behavior may be modelled as code, and not introspectable data. * However, e may be able to eventually encode most or all of CPU-model-specific behavior simply as different per-CPU-class property defaults. * In either case, we don't have an API for QOM class introspection, yet.
But there's something important in this case: the resulting CPUID data for a specific machine-type + CPU-model combination must be always the same, forever. This means libvirt may even use a static table, or cache this information indefinitely.
(Note that I am not talking about "-cpu host", here, but about all the other CPU models)
Hmm, so if the CPU filtering can vary per every single individual machine type, then the approach Jiri started here, of invoking QEMU with machine type set to query the CPU after it was created, is definitely not something we can follow. It is just far too inefficient.
I believe there's some confusion here: we are trying to solve two problems: 1) CPU feature filtering (checking which features are available in a given host) 2) CPU model probing (checking what exactly is going to be available when a given CPU model is used, in case nothing is filtered out) Item (1) depends on: host CPU capabilities, host kernel capabilities, QEMU capabilities, presence of some few QEMU command-line options (e.g. kernel irqchip), but shouldn't depend on the machine-type. It depends on /dev/kvm being open. Item (2) depends on the machine-type, but is static and must never change on future QEMU versions (if it changes, it is a QEMU bug). It doesn't depend on opening /dev/kvm. Item (1) can be solved if libvirt does the work itself, by opening /dev/kvm and checking for GET_SUPPORTED_CPUID and checking for QEMU options/capabilities (as long as we document that very carefully). But adding a more specific QMP command that won't require accel=kvm to work may be simpler and better for everybody. Item (2) may be solved today using a static table and/or caching (so libvirt just need to query this information once in a lifetime). It can also be solved partially (without machine-type support) in theory if QEMU let libvirt repeatedly create and destroy CPU objects just to query the resulting feature properties. ...but both problems could be solved very easily using current QEMU interfaces, if libvirt simply executed the QEMU binary more than once. Is "must not run QEMU more than once" a hard requirement? Perfect is the enemy of good. :)
I understand that the QEMU code isn't currently structured in a way that lets it easily expose information that varies per machine type, but I don't think we need to solve the entire problem space in a perfectly generic fashion here. Perfect is the enemy of good.
Right. Also, the more important item (item 1) is not affected by machine-types. Host features change every time you run on a new host/kernel, so probing it precisely is very useful, to detect problems earlier (not just at the last moment before starting a VM). On the other hand, per-machine-type CPU model changes are more rare, and libvirt can still detect unexpected results immediately before the VM is started. (I don't know what libvirt would do in case it detects it, though. Abort? Log a warning?)
If we can get all the CPU feature flag filtering information to be in statically defined data structures, then it seems that it would be pretty straightforward to add a monitor API that takes a CPU model name and machine type name, and returns the list of feature flags, without actually having to initialize the machine type or CPU. It can even just open /dev/kvm & issue the neccessary ioctl, without having to initialize the entire KVM CPU subsystem in QEMU.
The "without actually having to initialize the machine" part may be complicated, but it may be doable. But depending on the direction QEMU machine-types design is going (I don't know if there are plans to eventually make them more QOM-friendly), the solution accepted by QEMU may be different. I will suggest this as a topic for the next KVM call. Are you interested in joining the call? -- Eduardo

On Thu, Jul 25, 2013 at 10:15:56AM -0300, Eduardo Habkost wrote:
On Thu, Jul 25, 2013 at 10:45:10AM +0100, Daniel P. Berrange wrote:
On Wed, Jul 24, 2013 at 03:25:19PM -0300, Eduardo Habkost wrote:
In addition to the "-cpu host" KVM initialization problem, this is an additional problem with the current interfaces provided by QEMU:
1) libvirt needs to query data that depend on chosen machine-type and CPU model 2) Some machine-type behavior is code and not introspectable data * Luckily most of the data we need in this case should/will be encoded in the compat_props tables. * In either case, we don't have an API to query for machine-type compat_props information yet. 3) CPU model behavior will be modelled as CPU class behavior. Like on the machine-type case, some of the CPU-model-specific behavior may be modelled as code, and not introspectable data. * However, e may be able to eventually encode most or all of CPU-model-specific behavior simply as different per-CPU-class property defaults. * In either case, we don't have an API for QOM class introspection, yet.
But there's something important in this case: the resulting CPUID data for a specific machine-type + CPU-model combination must be always the same, forever. This means libvirt may even use a static table, or cache this information indefinitely.
(Note that I am not talking about "-cpu host", here, but about all the other CPU models)
Hmm, so if the CPU filtering can vary per every single individual machine type, then the approach Jiri started here, of invoking QEMU with machine type set to query the CPU after it was created, is definitely not something we can follow. It is just far too inefficient.
I believe there's some confusion here: we are trying to solve two problems:
1) CPU feature filtering (checking which features are available in a given host) 2) CPU model probing (checking what exactly is going to be available when a given CPU model is used, in case nothing is filtered out)
Yep, what Jiri proposed in the original libvirt thread was just a solution to 1). In seeing that though, I was concerned about how it scales up once we have to deal with 2) as well, which I believe is planned future work.
Item (1) depends on: host CPU capabilities, host kernel capabilities, QEMU capabilities, presence of some few QEMU command-line options (e.g. kernel irqchip), but shouldn't depend on the machine-type. It depends on /dev/kvm being open.
Item (2) depends on the machine-type, but is static and must never change on future QEMU versions (if it changes, it is a QEMU bug). It doesn't depend on opening /dev/kvm.
Item (1) can be solved if libvirt does the work itself, by opening /dev/kvm and checking for GET_SUPPORTED_CPUID and checking for QEMU options/capabilities (as long as we document that very carefully). But adding a more specific QMP command that won't require accel=kvm to work may be simpler and better for everybody.
Item (2) may be solved today using a static table and/or caching (so libvirt just need to query this information once in a lifetime). It can also be solved partially (without machine-type support) in theory if QEMU let libvirt repeatedly create and destroy CPU objects just to query the resulting feature properties.
We really don't want to have static tables, since that creates pain in the case where distro vendors create their own custom machine types or CPU models. It would mean libvirt had to record info not only about upstream QEMU, but about every vendor's QEMU builds. Probing the actual binary is the only sensible way here.
...but both problems could be solved very easily using current QEMU interfaces, if libvirt simply executed the QEMU binary more than once. Is "must not run QEMU more than once" a hard requirement? Perfect is the enemy of good. :)
Yes, it is a hard requirement.
I understand that the QEMU code isn't currently structured in a way that lets it easily expose information that varies per machine type, but I don't think we need to solve the entire problem space in a perfectly generic fashion here. Perfect is the enemy of good.
Right. Also, the more important item (item 1) is not affected by machine-types. Host features change every time you run on a new host/kernel, so probing it precisely is very useful, to detect problems earlier (not just at the last moment before starting a VM).
On the other hand, per-machine-type CPU model changes are more rare, and libvirt can still detect unexpected results immediately before the VM is started. (I don't know what libvirt would do in case it detects it, though. Abort? Log a warning?)
We don't want to be running QEMU multiple times during the startup process for a VM, because that adds delays to the startup process. It might not sound like much but adding a few 100ms to probe CPUs by running QEMU is quite significant for apps like libvirt-sandbox and libguesfs where absolute boot time is important. We used to run QEMU at startup to probe things & we just recently got rid of that delay, so I don't want to re-introduce it against. When starting a VM, we only once to start QEMU once, as the actual instance that is going to run the VM.
If we can get all the CPU feature flag filtering information to be in statically defined data structures, then it seems that it would be pretty straightforward to add a monitor API that takes a CPU model name and machine type name, and returns the list of feature flags, without actually having to initialize the machine type or CPU. It can even just open /dev/kvm & issue the neccessary ioctl, without having to initialize the entire KVM CPU subsystem in QEMU.
The "without actually having to initialize the machine" part may be complicated, but it may be doable. But depending on the direction QEMU machine-types design is going (I don't know if there are plans to eventually make them more QOM-friendly), the solution accepted by QEMU may be different.
I will suggest this as a topic for the next KVM call. Are you interested in joining the call?
Yes, assuming it doesn't clash with anything else i have scheduled. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Tue, Jul 23, 2013 at 07:28:38PM +0200, Jiri Denemark wrote:
On Tue, Jul 23, 2013 at 17:32:42 +0100, Daniel Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:33PM +0200, Jiri Denemark wrote:
--- src/qemu/qemu_monitor.c | 21 +++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 +++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++++ tests/qemumonitorjsontest.c | 74 ++++++++++ 14 files changed, 465 insertions(+) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json
ACK, though I believe the design of this monitor API is flawed because it requires you to re-launch QEMU with different accel args
Not really, this can be used in tcg too. It's just when we want to get the data for "host" CPU, we need to enable kvm as tcg knows nothing about that CPU. Which makes sense as kvm (the kernel module) influences how the "host" CPU will look like.
Is there no ioctl() for the KVM module we can just invoke directly to discover what CPU flag filtering it will perform. Presumably QEMU is using some ioctl to discover this, so libvirt ought to be able to too. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

On Wed, Jul 24, 2013 at 11:03:03AM +0100, Daniel P. Berrange wrote:
On Tue, Jul 23, 2013 at 07:28:38PM +0200, Jiri Denemark wrote:
On Tue, Jul 23, 2013 at 17:32:42 +0100, Daniel Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:33PM +0200, Jiri Denemark wrote:
--- src/qemu/qemu_monitor.c | 21 +++ src/qemu/qemu_monitor.h | 3 + src/qemu/qemu_monitor_json.c | 162 +++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 6 + tests/Makefile.am | 1 + .../qemumonitorjson-getcpu-empty.data | 2 + .../qemumonitorjson-getcpu-empty.json | 46 ++++++ .../qemumonitorjson-getcpu-filtered.data | 4 + .../qemumonitorjson-getcpu-filtered.json | 46 ++++++ .../qemumonitorjson-getcpu-full.data | 4 + .../qemumonitorjson-getcpu-full.json | 46 ++++++ .../qemumonitorjson-getcpu-host.data | 5 + .../qemumonitorjson-getcpu-host.json | 45 ++++++ tests/qemumonitorjsontest.c | 74 ++++++++++ 14 files changed, 465 insertions(+) create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-empty.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-filtered.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-full.json create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.data create mode 100644 tests/qemumonitorjsondata/qemumonitorjson-getcpu-host.json
ACK, though I believe the design of this monitor API is flawed because it requires you to re-launch QEMU with different accel args
Not really, this can be used in tcg too. It's just when we want to get the data for "host" CPU, we need to enable kvm as tcg knows nothing about that CPU. Which makes sense as kvm (the kernel module) influences how the "host" CPU will look like.
Is there no ioctl() for the KVM module we can just invoke directly to discover what CPU flag filtering it will perform. Presumably QEMU is using some ioctl to discover this, so libvirt ought to be able to too.
Yes, there is: GET_SUPPORTED_CPUID. But availability of some features may depend on QEMU capabilities as well. On those cases libvirt may need to combine the GET_SUPPORTED_CPUID results with what it knows about QEMU capabilities. But this should work as long as we report and document QEMU capabilities/options that affect CPU features very clearly. That may be an appropriate way to go to, if you don't mind having low-level KVM ioctl() code inside libvirt, that duplicates some QEMU logic. (But we still have the problem of querying/reporting CPU feature details that depend on machine-type+CPU-model [that is not addressed by this series yet]. See my previous message about it) -- Eduardo

--- src/qemu/qemu_capabilities.c | 192 +++++++++++++++++++++++++++---------------- 1 file changed, 120 insertions(+), 72 deletions(-) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 5dc3c9e..9440396 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -2464,6 +2464,116 @@ cleanup: return ret; } +static virCommandPtr +virQEMUCapsInitQMPCommandNew(const char *binary, + const char *monitor, + const char *pidfile, + uid_t runUid, + gid_t runGid) +{ + virCommandPtr cmd; + + /* + * We explicitly need to use -daemonize here, rather than + * virCommandDaemonize, because we need to synchronize + * with QEMU creating its monitor socket API. Using + * daemonize guarantees control won't return to libvirt + * until the socket is present. + */ + cmd = virCommandNewArgList(binary, + "-S", + "-no-user-config", + "-nodefaults", + "-nographic", + "-M", "none", + "-qmp", monitor, + "-pidfile", pidfile, + "-daemonize", + NULL); + virCommandAddEnvPassCommon(cmd); + virCommandClearCaps(cmd); + virCommandSetGID(cmd, runGid); + virCommandSetUID(cmd, runUid); + return cmd; +} + +static int +virQEMUCapsInitQMPCommandRun(virCommandPtr cmd, + const char *binary, + const char *pidfile, + virDomainChrSourceDefPtr config, + qemuMonitorPtr *mon, + pid_t *pid) +{ + int status = 0; + virDomainObj vm; + int ret = -1; + + if (virCommandRun(cmd, &status) < 0) { + ret = -2; + goto cleanup; + } + + if (status != 0) { + VIR_DEBUG("QEMU %s exited with status %d", binary, status); + goto cleanup; + } + + if (virPidFileReadPath(pidfile, pid) < 0) { + VIR_DEBUG("Failed to read pidfile %s", pidfile); + goto cleanup; + } + + memset(&vm, 0, sizeof(vm)); + vm.pid = *pid; + + if (!(*mon = qemuMonitorOpen(&vm, config, true, &callbacks))) + goto cleanup; + + virObjectLock(*mon); + + if (qemuMonitorSetCapabilities(*mon) < 0) { + virErrorPtr err = virGetLastError(); + VIR_DEBUG("Failed to set monitor capabilities %s", + err ? err->message : "<unknown problem>"); + goto cleanup; + } + + ret = 0; +cleanup: + return ret; +} + +static void +virQEMUCapsInitQMPCommandAbort(virCommandPtr *cmd, + qemuMonitorPtr *mon, + pid_t *pid, + const char *pidfile) +{ + if (*mon) + virObjectUnlock(*mon); + qemuMonitorClose(*mon); + *mon = NULL; + + virCommandAbort(*cmd); + virCommandFree(*cmd); + *cmd = NULL; + + if (*pid != 0) { + char ebuf[1024]; + + VIR_DEBUG("Killing QMP caps process %lld", (long long) *pid); + if (virProcessKill(*pid, SIGKILL) < 0 && errno != ESRCH) + VIR_ERROR(_("Failed to kill process %lld: %s"), + (long long) *pid, + virStrerror(errno, ebuf, sizeof(ebuf))); + *pid = 0; + } + + if (pidfile) + unlink(pidfile); +} + static int virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps, const char *libDir, @@ -2475,13 +2585,11 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps, qemuMonitorPtr mon = NULL; int major, minor, micro; char *package = NULL; - int status = 0; virDomainChrSourceDef config; char *monarg = NULL; char *monpath = NULL; char *pidfile = NULL; pid_t pid = 0; - virDomainObj vm; /* the ".sock" sufix is important to avoid a possible clash with a qemu * domain called "capabilities" @@ -2507,58 +2615,15 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps, VIR_DEBUG("Try to get caps via QMP qemuCaps=%p", qemuCaps); - /* - * We explicitly need to use -daemonize here, rather than - * virCommandDaemonize, because we need to synchronize - * with QEMU creating its monitor socket API. Using - * daemonize guarantees control won't return to libvirt - * until the socket is present. - */ - cmd = virCommandNewArgList(qemuCaps->binary, - "-S", - "-no-user-config", - "-nodefaults", - "-nographic", - "-M", "none", - "-qmp", monarg, - "-pidfile", pidfile, - "-daemonize", - NULL); - virCommandAddEnvPassCommon(cmd); - virCommandClearCaps(cmd); - virCommandSetGID(cmd, runGid); - virCommandSetUID(cmd, runUid); - - if (virCommandRun(cmd, &status) < 0) - goto cleanup; + cmd = virQEMUCapsInitQMPCommandNew(qemuCaps->binary, monarg, pidfile, + runUid, runGid); - if (status != 0) { - ret = 0; - VIR_DEBUG("QEMU %s exited with status %d", qemuCaps->binary, status); - goto cleanup; - } - - if (virPidFileReadPath(pidfile, &pid) < 0) { - VIR_DEBUG("Failed to read pidfile %s", pidfile); - ret = 0; - goto cleanup; - } - - memset(&vm, 0, sizeof(vm)); - vm.pid = pid; - - if (!(mon = qemuMonitorOpen(&vm, &config, true, &callbacks))) { - ret = 0; - goto cleanup; - } - - virObjectLock(mon); - - if (qemuMonitorSetCapabilities(mon) < 0) { - virErrorPtr err = virGetLastError(); - VIR_DEBUG("Failed to set monitor capabilities %s", - err ? err->message : "<unknown problem>"); - ret = 0; + if ((ret = virQEMUCapsInitQMPCommandRun(cmd, qemuCaps->binary, pidfile, + &config, &mon, &pid)) < 0) { + if (ret == -2) + ret = -1; + else + ret = 0; goto cleanup; } @@ -2617,28 +2682,11 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps, ret = 0; cleanup: - if (mon) - virObjectUnlock(mon); - qemuMonitorClose(mon); - virCommandAbort(cmd); - virCommandFree(cmd); + virQEMUCapsInitQMPCommandAbort(&cmd, &mon, &pid, pidfile); VIR_FREE(monarg); VIR_FREE(monpath); VIR_FREE(package); - - if (pid != 0) { - char ebuf[1024]; - - VIR_DEBUG("Killing QMP caps process %lld", (long long) pid); - if (virProcessKill(pid, SIGKILL) < 0 && errno != ESRCH) - VIR_ERROR(_("Failed to kill process %lld: %s"), - (long long) pid, - virStrerror(errno, ebuf, sizeof(ebuf))); - } - if (pidfile) { - unlink(pidfile); - VIR_FREE(pidfile); - } + VIR_FREE(pidfile); return ret; } -- 1.8.3.2

On Tue, Jul 23, 2013 at 06:11:34PM +0200, Jiri Denemark wrote: Could really do with a commit message describing what is being changed here & what the need is
--- src/qemu/qemu_capabilities.c | 192 +++++++++++++++++++++++++++---------------- 1 file changed, 120 insertions(+), 72 deletions(-)
Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

Since QEMU and kvm may filter some host CPU features or add efficiently emulated features, asking QEMU binary for host CPU data provides better results when we later use the data for building guest CPUs. --- src/qemu/qemu_capabilities.c | 44 +++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_capabilities.h | 2 ++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 9440396..d46a059 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -253,6 +253,7 @@ struct _virQEMUCaps { size_t ncpuDefinitions; char **cpuDefinitions; + virCPUDefPtr hostCPU; size_t nmachineTypes; char **machineTypes; @@ -1757,6 +1758,9 @@ virQEMUCapsPtr virQEMUCapsNewCopy(virQEMUCapsPtr qemuCaps) goto error; } + if (!(ret->hostCPU = virCPUDefCopy(qemuCaps->hostCPU))) + goto error; + if (VIR_ALLOC_N(ret->machineTypes, qemuCaps->nmachineTypes) < 0) goto error; if (VIR_ALLOC_N(ret->machineAliases, qemuCaps->nmachineTypes) < 0) @@ -1796,6 +1800,7 @@ void virQEMUCapsDispose(void *obj) VIR_FREE(qemuCaps->cpuDefinitions[i]); } VIR_FREE(qemuCaps->cpuDefinitions); + virCPUDefFree(qemuCaps->hostCPU); virBitmapFree(qemuCaps->flags); @@ -2485,7 +2490,6 @@ virQEMUCapsInitQMPCommandNew(const char *binary, "-no-user-config", "-nodefaults", "-nographic", - "-M", "none", "-qmp", monitor, "-pidfile", pidfile, "-daemonize", @@ -2617,6 +2621,7 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps, cmd = virQEMUCapsInitQMPCommandNew(qemuCaps->binary, monarg, pidfile, runUid, runGid); + virCommandAddArgList(cmd, "-M", "none", NULL); if ((ret = virQEMUCapsInitQMPCommandRun(cmd, qemuCaps->binary, pidfile, &config, &mon, &pid)) < 0) { @@ -2679,6 +2684,37 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps, if (virQEMUCapsProbeQMPCommandLine(qemuCaps, mon) < 0) goto cleanup; + if ((qemuCaps->arch == VIR_ARCH_I686 || + qemuCaps->arch == VIR_ARCH_X86_64) && + (virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM) || + virQEMUCapsGet(qemuCaps, QEMU_CAPS_ENABLE_KVM)) && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_CPU_HOST) && + qemuCaps->nmachineTypes) { + virQEMUCapsInitQMPCommandAbort(&cmd, &mon, &pid, pidfile); + + VIR_DEBUG("Checking host CPU data provided by %s", qemuCaps->binary); + cmd = virQEMUCapsInitQMPCommandNew(qemuCaps->binary, monarg, pidfile, + runUid, runGid); + virCommandAddArgList(cmd, "-cpu", "host", NULL); + /* -cpu host gives the same CPU for all machine types so we just + * use the first one when probing + */ + virCommandAddArg(cmd, "-machine"); + virCommandAddArgFormat(cmd, "%s,accel=kvm", + qemuCaps->machineTypes[0]); + + if (virQEMUCapsInitQMPCommandRun(cmd, qemuCaps->binary, pidfile, + &config, &mon, &pid) < 0) + goto cleanup; + + qemuCaps->hostCPU = qemuMonitorGetCPU(mon, qemuCaps->arch); + if (qemuCaps->hostCPU) { + char *cpu = virCPUDefFormat(qemuCaps->hostCPU, 0); + VIR_DEBUG("Host CPU reported by %s: %s", qemuCaps->binary, cpu); + VIR_FREE(cpu); + } + } + ret = 0; cleanup: @@ -2858,3 +2894,9 @@ virQEMUCapsUsedQMP(virQEMUCapsPtr qemuCaps) { return qemuCaps->usedQMP; } + +virCPUDefPtr +virQEMUCapsGetHostCPU(virQEMUCapsPtr qemuCaps) +{ + return qemuCaps->hostCPU; +} diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index f5f685d..e7774a3 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -272,4 +272,6 @@ int virQEMUCapsParseDeviceStr(virQEMUCapsPtr qemuCaps, const char *str); VIR_ENUM_DECL(virQEMUCaps); bool virQEMUCapsUsedQMP(virQEMUCapsPtr qemuCaps); +virCPUDefPtr virQEMUCapsGetHostCPU(virQEMUCapsPtr qemuCaps); + #endif /* __QEMU_CAPABILITIES_H__*/ -- 1.8.3.2

On Tue, Jul 23, 2013 at 06:11:35PM +0200, Jiri Denemark wrote:
Since QEMU and kvm may filter some host CPU features or add efficiently emulated features, asking QEMU binary for host CPU data provides better results when we later use the data for building guest CPUs. --- src/qemu/qemu_capabilities.c | 44 +++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_capabilities.h | 2 ++ 2 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 9440396..d46a059 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -253,6 +253,7 @@ struct _virQEMUCaps {
size_t ncpuDefinitions; char **cpuDefinitions; + virCPUDefPtr hostCPU;
size_t nmachineTypes; char **machineTypes; @@ -1757,6 +1758,9 @@ virQEMUCapsPtr virQEMUCapsNewCopy(virQEMUCapsPtr qemuCaps) goto error; }
+ if (!(ret->hostCPU = virCPUDefCopy(qemuCaps->hostCPU))) + goto error; + if (VIR_ALLOC_N(ret->machineTypes, qemuCaps->nmachineTypes) < 0) goto error; if (VIR_ALLOC_N(ret->machineAliases, qemuCaps->nmachineTypes) < 0) @@ -1796,6 +1800,7 @@ void virQEMUCapsDispose(void *obj) VIR_FREE(qemuCaps->cpuDefinitions[i]); } VIR_FREE(qemuCaps->cpuDefinitions); + virCPUDefFree(qemuCaps->hostCPU);
virBitmapFree(qemuCaps->flags);
@@ -2485,7 +2490,6 @@ virQEMUCapsInitQMPCommandNew(const char *binary, "-no-user-config", "-nodefaults", "-nographic", - "-M", "none", "-qmp", monitor, "-pidfile", pidfile, "-daemonize", @@ -2617,6 +2621,7 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps,
cmd = virQEMUCapsInitQMPCommandNew(qemuCaps->binary, monarg, pidfile, runUid, runGid); + virCommandAddArgList(cmd, "-M", "none", NULL);
if ((ret = virQEMUCapsInitQMPCommandRun(cmd, qemuCaps->binary, pidfile, &config, &mon, &pid)) < 0) { @@ -2679,6 +2684,37 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps, if (virQEMUCapsProbeQMPCommandLine(qemuCaps, mon) < 0) goto cleanup;
+ if ((qemuCaps->arch == VIR_ARCH_I686 || + qemuCaps->arch == VIR_ARCH_X86_64) && + (virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM) || + virQEMUCapsGet(qemuCaps, QEMU_CAPS_ENABLE_KVM)) && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_CPU_HOST) && + qemuCaps->nmachineTypes) { + virQEMUCapsInitQMPCommandAbort(&cmd, &mon, &pid, pidfile); + + VIR_DEBUG("Checking host CPU data provided by %s", qemuCaps->binary); + cmd = virQEMUCapsInitQMPCommandNew(qemuCaps->binary, monarg, pidfile, + runUid, runGid); + virCommandAddArgList(cmd, "-cpu", "host", NULL); + /* -cpu host gives the same CPU for all machine types so we just + * use the first one when probing + */ + virCommandAddArg(cmd, "-machine"); + virCommandAddArgFormat(cmd, "%s,accel=kvm", + qemuCaps->machineTypes[0]); + + if (virQEMUCapsInitQMPCommandRun(cmd, qemuCaps->binary, pidfile, + &config, &mon, &pid) < 0) + goto cleanup; + + qemuCaps->hostCPU = qemuMonitorGetCPU(mon, qemuCaps->arch); + if (qemuCaps->hostCPU) { + char *cpu = virCPUDefFormat(qemuCaps->hostCPU, 0); + VIR_DEBUG("Host CPU reported by %s: %s", qemuCaps->binary, cpu); + VIR_FREE(cpu); + } + }
This code is causing us to invoke the QEMU binary multiple times, which is something we worked really hard to get away from. I really, really don't like this as an approach. QEMU needs to be able to give us the data we need here without multiple invocations. eg, by allowing the monitor command to specify 'kvm' vs 'qemu' when asking for data, so you can interrogate it without having to re-launch it with different accel=XXX args. Daniel -- |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| |: http://libvirt.org -o- http://virt-manager.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :|

[adding qemu] On 07/23/2013 10:19 AM, Daniel P. Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:35PM +0200, Jiri Denemark wrote:
Since QEMU and kvm may filter some host CPU features or add efficiently emulated features, asking QEMU binary for host CPU data provides better results when we later use the data for building guest CPUs. ---
+ virCommandAddArg(cmd, "-machine"); + virCommandAddArgFormat(cmd, "%s,accel=kvm", + qemuCaps->machineTypes[0]); +
This code is causing us to invoke the QEMU binary multiple times, which is something we worked really hard to get away from. I really, really don't like this as an approach. QEMU needs to be able to give us the data we need here without multiple invocations.
eg, by allowing the monitor command to specify 'kvm' vs 'qemu' when asking for data, so you can interrogate it without having to re-launch it with different accel=XXX args.
We don't know if -machine accel=kvm is a valid option until after issuing some QMP commands, but now we are forced to reinvoke a new qemu with -machine accel=kvm enabled in order to get the query to give us accurate answers. I agree that this is less than desirable; hopefully the qemu folks can help us figure out a solution, now that we are bringing attention to the issue. -- Eric Blake eblake redhat com +1-919-301-3266 Libvirt virtualization library http://libvirt.org

On Tue, Jul 23, 2013 at 17:19:03 +0100, Daniel Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:35PM +0200, Jiri Denemark wrote:
Since QEMU and kvm may filter some host CPU features or add efficiently emulated features, asking QEMU binary for host CPU data provides better results when we later use the data for building guest CPUs. --- src/qemu/qemu_capabilities.c | 44 +++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_capabilities.h | 2 ++ 2 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 9440396..d46a059 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -253,6 +253,7 @@ struct _virQEMUCaps {
size_t ncpuDefinitions; char **cpuDefinitions; + virCPUDefPtr hostCPU;
size_t nmachineTypes; char **machineTypes; @@ -1757,6 +1758,9 @@ virQEMUCapsPtr virQEMUCapsNewCopy(virQEMUCapsPtr qemuCaps) goto error; }
+ if (!(ret->hostCPU = virCPUDefCopy(qemuCaps->hostCPU))) + goto error; + if (VIR_ALLOC_N(ret->machineTypes, qemuCaps->nmachineTypes) < 0) goto error; if (VIR_ALLOC_N(ret->machineAliases, qemuCaps->nmachineTypes) < 0) @@ -1796,6 +1800,7 @@ void virQEMUCapsDispose(void *obj) VIR_FREE(qemuCaps->cpuDefinitions[i]); } VIR_FREE(qemuCaps->cpuDefinitions); + virCPUDefFree(qemuCaps->hostCPU);
virBitmapFree(qemuCaps->flags);
@@ -2485,7 +2490,6 @@ virQEMUCapsInitQMPCommandNew(const char *binary, "-no-user-config", "-nodefaults", "-nographic", - "-M", "none", "-qmp", monitor, "-pidfile", pidfile, "-daemonize", @@ -2617,6 +2621,7 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps,
cmd = virQEMUCapsInitQMPCommandNew(qemuCaps->binary, monarg, pidfile, runUid, runGid); + virCommandAddArgList(cmd, "-M", "none", NULL);
if ((ret = virQEMUCapsInitQMPCommandRun(cmd, qemuCaps->binary, pidfile, &config, &mon, &pid)) < 0) { @@ -2679,6 +2684,37 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps, if (virQEMUCapsProbeQMPCommandLine(qemuCaps, mon) < 0) goto cleanup;
+ if ((qemuCaps->arch == VIR_ARCH_I686 || + qemuCaps->arch == VIR_ARCH_X86_64) && + (virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM) || + virQEMUCapsGet(qemuCaps, QEMU_CAPS_ENABLE_KVM)) && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_CPU_HOST) && + qemuCaps->nmachineTypes) { + virQEMUCapsInitQMPCommandAbort(&cmd, &mon, &pid, pidfile); + + VIR_DEBUG("Checking host CPU data provided by %s", qemuCaps->binary); + cmd = virQEMUCapsInitQMPCommandNew(qemuCaps->binary, monarg, pidfile, + runUid, runGid); + virCommandAddArgList(cmd, "-cpu", "host", NULL); + /* -cpu host gives the same CPU for all machine types so we just + * use the first one when probing + */ + virCommandAddArg(cmd, "-machine"); + virCommandAddArgFormat(cmd, "%s,accel=kvm", + qemuCaps->machineTypes[0]); + + if (virQEMUCapsInitQMPCommandRun(cmd, qemuCaps->binary, pidfile, + &config, &mon, &pid) < 0) + goto cleanup; + + qemuCaps->hostCPU = qemuMonitorGetCPU(mon, qemuCaps->arch); + if (qemuCaps->hostCPU) { + char *cpu = virCPUDefFormat(qemuCaps->hostCPU, 0); + VIR_DEBUG("Host CPU reported by %s: %s", qemuCaps->binary, cpu); + VIR_FREE(cpu); + } + }
This code is causing us to invoke the QEMU binary multiple times, which is something we worked really hard to get away from. I really, really don't like this as an approach. QEMU needs to be able to give us the data we need here without multiple invocations.
eg, by allowing the monitor command to specify 'kvm' vs 'qemu' when asking for data, so you can interrogate it without having to re-launch it with different accel=XXX args.
Yeah, it's not an ideal solution but it's all we can get now. For giving us host CPU specification QEMU needs to initialize a complete machine, that is, it needs to be started with machine type != none, and it needs to be started with -cpu host, which cannot be used in tcg, only kvm supports "host" CPU. So the reasons for us invoking QEMU twice are very deep inside QEMU architecture. Moreover, we only run QEMU binary twice for i686 and x86_64 archs and when the binary supports kvm, although this probably covers most real-life cases :/ In any case, it's all behind our capabilities cache so this should only happen once in libvirt life-time. Jirka

On Tue, Jul 23, 2013 at 05:19:03PM +0100, Daniel P. Berrange wrote:
On Tue, Jul 23, 2013 at 06:11:35PM +0200, Jiri Denemark wrote:
Since QEMU and kvm may filter some host CPU features or add efficiently emulated features, asking QEMU binary for host CPU data provides better results when we later use the data for building guest CPUs. --- src/qemu/qemu_capabilities.c | 44 +++++++++++++++++++++++++++++++++++++++++++- src/qemu/qemu_capabilities.h | 2 ++ 2 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 9440396..d46a059 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -253,6 +253,7 @@ struct _virQEMUCaps {
size_t ncpuDefinitions; char **cpuDefinitions; + virCPUDefPtr hostCPU;
size_t nmachineTypes; char **machineTypes; @@ -1757,6 +1758,9 @@ virQEMUCapsPtr virQEMUCapsNewCopy(virQEMUCapsPtr qemuCaps) goto error; }
+ if (!(ret->hostCPU = virCPUDefCopy(qemuCaps->hostCPU))) + goto error; + if (VIR_ALLOC_N(ret->machineTypes, qemuCaps->nmachineTypes) < 0) goto error; if (VIR_ALLOC_N(ret->machineAliases, qemuCaps->nmachineTypes) < 0) @@ -1796,6 +1800,7 @@ void virQEMUCapsDispose(void *obj) VIR_FREE(qemuCaps->cpuDefinitions[i]); } VIR_FREE(qemuCaps->cpuDefinitions); + virCPUDefFree(qemuCaps->hostCPU);
virBitmapFree(qemuCaps->flags);
@@ -2485,7 +2490,6 @@ virQEMUCapsInitQMPCommandNew(const char *binary, "-no-user-config", "-nodefaults", "-nographic", - "-M", "none", "-qmp", monitor, "-pidfile", pidfile, "-daemonize", @@ -2617,6 +2621,7 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps,
cmd = virQEMUCapsInitQMPCommandNew(qemuCaps->binary, monarg, pidfile, runUid, runGid); + virCommandAddArgList(cmd, "-M", "none", NULL);
if ((ret = virQEMUCapsInitQMPCommandRun(cmd, qemuCaps->binary, pidfile, &config, &mon, &pid)) < 0) { @@ -2679,6 +2684,37 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps, if (virQEMUCapsProbeQMPCommandLine(qemuCaps, mon) < 0) goto cleanup;
+ if ((qemuCaps->arch == VIR_ARCH_I686 || + qemuCaps->arch == VIR_ARCH_X86_64) && + (virQEMUCapsGet(qemuCaps, QEMU_CAPS_KVM) || + virQEMUCapsGet(qemuCaps, QEMU_CAPS_ENABLE_KVM)) && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_CPU_HOST) && + qemuCaps->nmachineTypes) { + virQEMUCapsInitQMPCommandAbort(&cmd, &mon, &pid, pidfile); + + VIR_DEBUG("Checking host CPU data provided by %s", qemuCaps->binary); + cmd = virQEMUCapsInitQMPCommandNew(qemuCaps->binary, monarg, pidfile, + runUid, runGid); + virCommandAddArgList(cmd, "-cpu", "host", NULL); + /* -cpu host gives the same CPU for all machine types so we just + * use the first one when probing + */ + virCommandAddArg(cmd, "-machine"); + virCommandAddArgFormat(cmd, "%s,accel=kvm", + qemuCaps->machineTypes[0]); + + if (virQEMUCapsInitQMPCommandRun(cmd, qemuCaps->binary, pidfile, + &config, &mon, &pid) < 0) + goto cleanup; + + qemuCaps->hostCPU = qemuMonitorGetCPU(mon, qemuCaps->arch); + if (qemuCaps->hostCPU) { + char *cpu = virCPUDefFormat(qemuCaps->hostCPU, 0); + VIR_DEBUG("Host CPU reported by %s: %s", qemuCaps->binary, cpu); + VIR_FREE(cpu); + } + }
This code is causing us to invoke the QEMU binary multiple times, which is something we worked really hard to get away from. I really, really don't like this as an approach. QEMU needs to be able to give us the data we need here without multiple invocations.
eg, by allowing the monitor command to specify 'kvm' vs 'qemu' when asking for data, so you can interrogate it without having to re-launch it with different accel=XXX args.
The specific information libvirt requires here depend on KVM being initialized, and QEMU code/interfaces currently assume that: 1) there's only 1 machine being initialized, and it is initialized very early; 2) there's only one accelerator being initialized, and it is initialized very early. I have no idea how long it will take for QEMU to provide a QMP interface for late/multiple initialization of machines/accelerators. In the meantime, the way libvirt queries for host CPU capabilities without taking QEMU and kernel capabilities into account is a serious bug we need to solve. Maybe if we are lucky we can find a workaround in the meantime that won't require running QEMU multiple times, but I am not sure. Maybe it will be a hack that will be worse than simply running QEMU twice. -- Eduardo

Use the host CPU data probed by the current emulator when updating a guest CPU according to a host CPU or when checking whether they are compatible. --- src/qemu/qemu_command.c | 32 ++++++++++++++++++++++++++------ src/qemu/qemu_domain.c | 21 +++++++++++++++------ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index d0aed7c..f2124d8 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -5683,13 +5683,19 @@ qemuBuildCpuArgStr(const virQEMUDriverPtr driver, virBuffer buf = VIR_BUFFER_INITIALIZER; size_t i; virCapsPtr caps = NULL; + bool filteredHost = false; *hasHwVirt = false; if (!(caps = virQEMUDriverGetCapabilities(driver, false))) goto cleanup; - host = caps->host.cpu; + if (def->virtType == VIR_DOMAIN_VIRT_KVM) + host = virQEMUCapsGetHostCPU(qemuCaps); + if (host) + filteredHost = true; + else + host = caps->host.cpu; if (def->os.arch == VIR_ARCH_I686) default_model = "qemu32"; @@ -5722,12 +5728,26 @@ qemuBuildCpuArgStr(const virQEMUDriverPtr driver, switch (cmp) { case VIR_CPU_COMPARE_INCOMPATIBLE: if (compare_msg) { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, - _("guest and host CPU are not compatible: %s"), - compare_msg); + if (filteredHost) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("requested guest CPU cannot be provided " + "by QEMU binary on this host: %s"), + compare_msg); + } else { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("guest and host CPU are not " + "compatible: %s"), compare_msg); + } } else { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("guest CPU is not compatible with host CPU")); + if (filteredHost) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("requested guest CPU cannot be provided " + "by QEMU binary on this host")); + } else { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("guest CPU is not compatible " + "with host CPU")); + } } /* fall through */ case VIR_CPU_COMPARE_ERROR: diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index da3b768..83197e0 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -1347,6 +1347,8 @@ qemuDomainDefFormatBuf(virQEMUDriverPtr driver, virDomainControllerDefPtr *controllers = NULL; int ncontrollers = 0; virCapsPtr caps = NULL; + virQEMUCapsPtr qemuCaps = NULL; + virCPUDefPtr hostCPU = NULL; if (!(caps = virQEMUDriverGetCapabilities(driver, false))) goto cleanup; @@ -1355,15 +1357,21 @@ qemuDomainDefFormatBuf(virQEMUDriverPtr driver, if ((flags & VIR_DOMAIN_XML_UPDATE_CPU) && def_cpu && (def_cpu->mode != VIR_CPU_MODE_CUSTOM || def_cpu->model)) { - if (!caps->host.cpu || - !caps->host.cpu->model) { - virReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("cannot get host CPU capabilities")); - goto cleanup; + if ((qemuCaps = virQEMUCapsCacheLookup(driver->qemuCapsCache, + def->emulator))) + hostCPU = virQEMUCapsGetHostCPU(qemuCaps); + + if (!hostCPU) { + if (!caps->host.cpu || !caps->host.cpu->model) { + virReportError(VIR_ERR_OPERATION_FAILED, "%s", + _("cannot get host CPU capabilities")); + goto cleanup; + } + hostCPU = caps->host.cpu; } if (!(cpu = virCPUDefCopy(def_cpu)) || - cpuUpdate(cpu, caps->host.cpu) < 0) + cpuUpdate(cpu, hostCPU) < 0) goto cleanup; def->cpu = cpu; } @@ -1445,6 +1453,7 @@ cleanup: def->ncontrollers = ncontrollers; } virObjectUnref(caps); + virObjectUnref(qemuCaps); return ret; } -- 1.8.3.2
participants (5)
-
Andreas Färber
-
Daniel P. Berrange
-
Eduardo Habkost
-
Eric Blake
-
Jiri Denemark