[libvirt] [PATCH v3] qemu-migration: Disallow migration of read only disk
by Corey S. McQuay
Currently Libvirt allows attempts to migrate read only disks. Qemu cannot handle this as read only
disks cannot be written to on the destination system. The end result is a cryptic error message
and a failed migration.
This patch causes migration to fail earlier and provides a meaningful error message stating that
migrating read only disks is not supported.
Signed-off-by: Corey S. McQuay <csmcquay(a)linux.vnet.ibm.com>
Reviewed-by: Jason J. Herne <jjherne(a)linux.vnet.ibm.com>
Reviewed-by: Boris Fiuczynski <fiuczy(a)linux.vnet.ibm.com>
---
src/qemu/qemu_migration.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index e451ef6..c8fb7ec 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1764,6 +1764,12 @@ qemuMigrationStartNBDServer(virQEMUDriverPtr driver,
/* check whether disk should be migrated */
if (!qemuMigrateDisk(disk, nmigrate_disks, migrate_disks))
continue;
+
+ if (disk->src->readonly) {
+ virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
+ _("Cannot migrate read-only disk %s"), disk->dst);
+ goto cleanup;
+ }
VIR_FREE(diskAlias);
if (!(diskAlias = qemuAliasFromDisk(disk)))
--
2.7.4
8 years, 1 month
[libvirt] [PATCH 0/4] systemd-related fixes and improvements
by Andrea Bolognani
Make libvirt on systemd nicer for the user, by getting rid of
some confusing behavior, and overall more solid.
More details in each specific patch.
Andrea Bolognani (4):
virtlogd.socket: Tie lifecycle to libvirtd.service
libvirt-guests.service: Improve description
libvirt-guests.service: Split After= relationship
libvirt-guests.service: Add Requires=libvirtd.service
daemon/libvirtd.service.in | 1 +
src/logging/virtlogd.service.in | 2 ++
src/logging/virtlogd.socket.in | 2 ++
tools/libvirt-guests.service.in | 7 +++++--
4 files changed, 10 insertions(+), 2 deletions(-)
--
2.7.4
8 years, 1 month
[libvirt] [PATCH] libvirt-storage.c:Lines too long, use 80 character columns.
by Nitesh Konkar
Signed-off-by: Nitesh Konkar <nitkon12(a)linux.vnet.ibm.com>
---
src/libvirt-storage.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
index 48996ba..c4f2a03 100644
--- a/src/libvirt-storage.c
+++ b/src/libvirt-storage.c
@@ -233,7 +233,8 @@ virConnectNumOfDefinedStoragePools(virConnectPtr conn)
virCheckConnectReturn(conn, -1);
- if (conn->storageDriver && conn->storageDriver->connectNumOfDefinedStoragePools) {
+ if (conn->storageDriver &&
+ conn->storageDriver->connectNumOfDefinedStoragePools) {
int ret;
ret = conn->storageDriver->connectNumOfDefinedStoragePools(conn);
if (ret < 0)
@@ -280,7 +281,8 @@ virConnectListDefinedStoragePools(virConnectPtr conn,
virCheckNonNullArgGoto(names, error);
virCheckNonNegativeArgGoto(maxnames, error);
- if (conn->storageDriver && conn->storageDriver->connectListDefinedStoragePools) {
+ if (conn->storageDriver &&
+ conn->storageDriver->connectListDefinedStoragePools) {
int ret;
ret = conn->storageDriver->connectListDefinedStoragePools(conn, names, maxnames);
if (ret < 0)
@@ -332,7 +334,8 @@ virConnectFindStoragePoolSources(virConnectPtr conn,
virCheckNonNullArgGoto(type, error);
virCheckReadOnlyGoto(conn->flags, error);
- if (conn->storageDriver && conn->storageDriver->connectFindStoragePoolSources) {
+ if (conn->storageDriver &&
+ conn->storageDriver->connectFindStoragePoolSources) {
char *ret;
ret = conn->storageDriver->connectFindStoragePoolSources(conn, type, srcSpec, flags);
if (!ret)
@@ -485,7 +488,8 @@ virStoragePoolLookupByVolume(virStorageVolPtr vol)
virCheckStorageVolReturn(vol, NULL);
- if (vol->conn->storageDriver && vol->conn->storageDriver->storagePoolLookupByVolume) {
+ if (vol->conn->storageDriver &&
+ vol->conn->storageDriver->storagePoolLookupByVolume) {
virStoragePoolPtr ret;
ret = vol->conn->storageDriver->storagePoolLookupByVolume(vol);
if (!ret)
@@ -1188,7 +1192,8 @@ virStoragePoolNumOfVolumes(virStoragePoolPtr pool)
virCheckStoragePoolReturn(pool, -1);
- if (pool->conn->storageDriver && pool->conn->storageDriver->storagePoolNumOfVolumes) {
+ if (pool->conn->storageDriver &&
+ pool->conn->storageDriver->storagePoolNumOfVolumes) {
int ret;
ret = pool->conn->storageDriver->storagePoolNumOfVolumes(pool);
if (ret < 0)
@@ -1230,7 +1235,8 @@ virStoragePoolListVolumes(virStoragePoolPtr pool,
virCheckNonNullArgGoto(names, error);
virCheckNonNegativeArgGoto(maxnames, error);
- if (pool->conn->storageDriver && pool->conn->storageDriver->storagePoolListVolumes) {
+ if (pool->conn->storageDriver &&
+ pool->conn->storageDriver->storagePoolListVolumes) {
int ret;
ret = pool->conn->storageDriver->storagePoolListVolumes(pool, names, maxnames);
if (ret < 0)
@@ -1297,7 +1303,8 @@ virStorageVolLookupByName(virStoragePoolPtr pool,
virCheckStoragePoolReturn(pool, NULL);
virCheckNonNullArgGoto(name, error);
- if (pool->conn->storageDriver && pool->conn->storageDriver->storageVolLookupByName) {
+ if (pool->conn->storageDriver &&
+ pool->conn->storageDriver->storageVolLookupByName) {
virStorageVolPtr ret;
ret = pool->conn->storageDriver->storageVolLookupByName(pool, name);
if (!ret)
@@ -1471,7 +1478,8 @@ virStorageVolCreateXML(virStoragePoolPtr pool,
virCheckNonNullArgGoto(xmlDesc, error);
virCheckReadOnlyGoto(pool->conn->flags, error);
- if (pool->conn->storageDriver && pool->conn->storageDriver->storageVolCreateXML) {
+ if (pool->conn->storageDriver &&
+ pool->conn->storageDriver->storageVolCreateXML) {
virStorageVolPtr ret;
ret = pool->conn->storageDriver->storageVolCreateXML(pool, xmlDesc, flags);
if (!ret)
--
2.1.0
8 years, 1 month
[libvirt] dnsmasq option "dhcp-authoritative" in libvirt virtual networks?
by Martin Wilck
Hello,
I am seeing a slightly annoying behavior of libvirt-created networks on
my system. Whenever a VM fails to renew its DHCP lease in time (for
example because my laptop is suspended), the IP address changes,
causing various annoyances. If this happens, the log message
"DHCPNAK(virbrX): ... lease not found" appears in the libvirtd logs.
Looking at the dnsmasq code, it seems that this would be fixed by
running dnsmasq with the "dhcp-authoritative" option, which, according
to the dnsmasq man page, "should be set when dnsmasq is definitely the
only DHCP server on a network" - IMO that's the case for libvirtd-
managed virtual networks. So the question comes to my mind: is there a
good reason why libvirtd doesn't set "dhcp-authoritative" on the
dnsmasq instances it starts?
Regards
Martin
--
Dr. Martin Wilck <mwilck(a)suse.com>, Tel. +49 (0)911 74053 2107
SUSELinux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
8 years, 1 month
[libvirt] [PATCH 00/11] cleanups and improvements for video device code
by Pavel Hrdina
Pavel Hrdina (11):
tests: fix some QXL capability combinations that doesn't make sense
qemu_capabilities: join capabilities for qxl and qxl-vga devices
qemu_capabilities: mark QEMU_CAPS_VGA_QXL capability as deprecated
qemu_domain: move video validation out of qemu_command
qemu_process: move video validation out of qemu_command
qemu_capabilities: rename QEMU_CAPS_VIRTIO_GPU_VIRGL
qemu_command: separate code for video device via -vga attribute
qemu_command: cleanup qemuBuildVideoCommandLine
qemu_capabilities: check for existence of virtio-vga
qemu_command: properly detect which model to use for video device
qemu_command: add support to use virtio as secondary video device
docs/formatdomain.html.in | 3 +-
src/qemu/qemu_capabilities.c | 23 +-
src/qemu/qemu_capabilities.h | 15 +-
src/qemu/qemu_command.c | 335 +++++++++------------
src/qemu/qemu_domain.c | 70 +++++
src/qemu/qemu_domain.h | 3 +
src/qemu/qemu_domain_address.c | 6 -
src/qemu/qemu_process.c | 54 +++-
.../qemu_2.6.0-gicv2-virt.aarch64.xml | 1 -
.../qemu_2.6.0-gicv3-virt.aarch64.xml | 1 -
tests/domaincapsschemadata/qemu_2.6.0.aarch64.xml | 1 -
tests/domaincapsschemadata/qemu_2.6.0.ppc64le.xml | 1 -
.../qemucapabilitiesdata/caps_1.2.2.x86_64.replies | 70 +----
tests/qemucapabilitiesdata/caps_1.2.2.x86_64.xml | 4 -
.../qemucapabilitiesdata/caps_1.3.1.x86_64.replies | 76 +----
tests/qemucapabilitiesdata/caps_1.3.1.x86_64.xml | 4 -
.../qemucapabilitiesdata/caps_1.4.2.x86_64.replies | 74 +----
tests/qemucapabilitiesdata/caps_1.4.2.x86_64.xml | 4 -
.../qemucapabilitiesdata/caps_1.5.3.x86_64.replies | 74 +----
tests/qemucapabilitiesdata/caps_1.5.3.x86_64.xml | 4 -
.../qemucapabilitiesdata/caps_1.6.0.x86_64.replies | 74 +----
tests/qemucapabilitiesdata/caps_1.6.0.x86_64.xml | 4 -
.../qemucapabilitiesdata/caps_1.7.0.x86_64.replies | 74 +----
tests/qemucapabilitiesdata/caps_1.7.0.x86_64.xml | 4 -
.../qemucapabilitiesdata/caps_2.1.1.x86_64.replies | 74 +----
tests/qemucapabilitiesdata/caps_2.1.1.x86_64.xml | 4 -
.../qemucapabilitiesdata/caps_2.4.0.x86_64.replies | 107 ++-----
tests/qemucapabilitiesdata/caps_2.4.0.x86_64.xml | 6 +-
.../qemucapabilitiesdata/caps_2.5.0.x86_64.replies | 117 +++----
tests/qemucapabilitiesdata/caps_2.5.0.x86_64.xml | 6 +-
.../caps_2.6.0-gicv2.aarch64.replies | 43 ++-
.../caps_2.6.0-gicv2.aarch64.xml | 1 -
.../caps_2.6.0-gicv3.aarch64.replies | 43 ++-
.../caps_2.6.0-gicv3.aarch64.xml | 1 -
.../caps_2.6.0.ppc64le.replies | 43 ++-
tests/qemucapabilitiesdata/caps_2.6.0.ppc64le.xml | 2 +-
.../qemucapabilitiesdata/caps_2.6.0.x86_64.replies | 117 +++----
tests/qemucapabilitiesdata/caps_2.6.0.x86_64.xml | 6 +-
.../qemucapabilitiesdata/caps_2.7.0.x86_64.replies | 122 +++-----
tests/qemucapabilitiesdata/caps_2.7.0.x86_64.xml | 6 +-
tests/qemuhelptest.c | 4 -
.../qemuxml2argv-pcie-root-port.args | 5 +-
.../qemuxml2argv-pcie-switch-downstream-port.args | 5 +-
.../qemuxml2argv-pcie-switch-upstream-port.args | 5 +-
.../qemuxml2argv-pcihole64-q35.args | 5 +-
.../qemuxml2argv-q35-usb2-multi.args | 5 +-
.../qemuxml2argv-q35-usb2-reorder.args | 5 +-
tests/qemuxml2argvdata/qemuxml2argv-q35-usb2.args | 5 +-
tests/qemuxml2argvdata/qemuxml2argv-q35.args | 5 +-
.../qemuxml2argv-video-virtio-gpu-device.args | 2 +-
.../qemuxml2argv-video-virtio-gpu-sec.args | 25 ++
.../qemuxml2argv-video-virtio-gpu-sec.xml | 36 +++
.../qemuxml2argv-video-virtio-gpu-spice-gl.args | 2 +-
.../qemuxml2argv-video-virtio-gpu-virgl.args | 2 +-
.../qemuxml2argv-video-virtio-vga.args | 24 ++
...evice.xml => qemuxml2argv-video-virtio-vga.xml} | 11 +-
tests/qemuxml2argvtest.c | 152 +++++-----
tests/qemuxml2xmltest.c | 18 +-
58 files changed, 778 insertions(+), 1215 deletions(-)
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-video-virtio-gpu-sec.args
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-video-virtio-gpu-sec.xml
create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-video-virtio-vga.args
rename tests/qemuxml2argvdata/{qemuxml2argv-video-qxl-sec-nodevice.xml => qemuxml2argv-video-virtio-vga.xml} (77%)
--
2.10.0
8 years, 1 month
[libvirt] [PATCH v2 00/20] Split parsing and defining logic of daemon's logging
by Erik Skultety
v2 of the original series
https://www.redhat.com/archives/libvir-list/2016-May/msg00229.html
since v1:
- as Cole pointed out in 20/38 of the original series, the patches were not
designed in an elegant way and they were hard to review, so this series reworked
the whole series:
-> first the existing methods that do combine parsing and defining logic
and which should be dropped are renamed to a more accurate name
-> all the necessary methods to achieve the "split" are introduced
gradually, interconnected with each other
-> finally, all the callers switch to the new logic introduced in the early
patches in a transparent way
-> all the original poorly named methods are completely dropped
- also, the original series introduced a new set of API locks because there was
an issue with 2 concurrent setters that while setter1 was preparing its local
set of outputs to replace the existing global one, setter2 might just replace
the global set with its copy, invalidating all fds of the setter1's set because
the original series used a concept of *copying* (not duplicating) of fds, so
the copied fd would be invalidated by issuing reset by setter2.
This series however, duplicates the file-based outputs'
(that should remain opened) fds. So even if setter2 replaces the original set
with its copy and calls reset, effectively closing all fds, it does not matter
for setter1, since unlink only decrements the number of references to a
specific opened fd.
Erik Skultety (20):
virlog: Rename virLogParse* to virLogParseAndDefine*
virlog: Introduce virLogOutputNew
virlog: Introduce virLogFilterNew
virlog: Introduce virLogFindOutput
virlog: Introduce virLogDefineOutputs
virlog: Introduce virLogDefineFilters
virlog: Introduce virLogNewOutputTo* as a replacement for
virLogAddOutputTo*
virlog: Take a special care of syslog when setting new set of log
outputs
virlog: Introduce virLogParseOutput
virlog: Introduce virLogParseFilter
virlog: Introduce virLogParseOutputs
virlog: Introduce virLogParseFilters
virlog: Introduce virLogSetOutputs
virlog: Introduce virLogSetFilters
daemon: Split output parsing and output defining
daemon: Split filter parsing and filter defining
virlog: Remove functions that aren't used anywhere anymore
virlog: Make some of the methods static
virlog: Store the journald fd within the output object
virlog: Split parsing and setting priority
daemon/libvirtd.c | 8 +-
src/libvirt_private.syms | 10 +-
src/locking/lock_daemon.c | 8 +-
src/logging/log_daemon.c | 8 +-
src/util/virlog.c | 1079 ++++++++++++++++++++++++++-------------------
src/util/virlog.h | 61 +--
tests/eventtest.c | 3 +-
tests/testutils.c | 11 +-
tests/virlogtest.c | 10 +-
9 files changed, 702 insertions(+), 496 deletions(-)
--
2.5.5
8 years, 1 month
[libvirt] [RFC v2] libvirt vGPU QEMU integration
by Kirti Wankhede
Hi libvirt experts,
Thanks for valuable input on v1 version of RFC.
Quick brief, VFIO based mediated device framework provides a way to
virtualize their devices without SR-IOV, like NVIDIA vGPU, Intel KVMGT
and IBM's channel IO. This framework reuses VFIO APIs for all the
functionalities for mediated devices which are currently being used for
pass through devices. This framework introduces a set of new sysfs files
for device creation and its life cycle management.
Here is the summary of discussion on v1:
1. Discover mediated device:
As part of physical device initialization process, vendor driver will
register their physical devices, which will be used to create virtual
device (mediated device, aka mdev) to the mediated framework.
Vendor driver should specify mdev_supported_types in directory format.
This format is class based, for example, display class directory format
should be as below. We need to define such set for each class of devices
which would be supported by mediated device framework.
--- mdev_destroy
--- mdev_supported_types
|-- 11
| |-- create
| |-- name
| |-- fb_length
| |-- resolution
| |-- heads
| |-- max_instances
| |-- params
| |-- requires_group
|-- 12
| |-- create
| |-- name
| |-- fb_length
| |-- resolution
| |-- heads
| |-- max_instances
| |-- params
| |-- requires_group
|-- 13
|-- create
|-- name
|-- fb_length
|-- resolution
|-- heads
|-- max_instances
|-- params
|-- requires_group
In the above example directory '11' represents a type id of mdev device.
'name', 'fb_length', 'resolution', 'heads', 'max_instance' and
'requires_group' would be Read-Only files that vendor would provide to
describe about that type.
'create':
Write-only file. Mandatory.
Accepts string to create mediated device.
'name':
Read-Only file. Mandatory.
Returns string, the name of that type id.
'fb_length':
Read-only file. Mandatory.
Returns <number>{K,M,G}, size of framebuffer.
'resolution':
Read-Only file. Mandatory.
Returns 'hres x vres' format. Maximum supported resolution.
'heads':
Read-Only file. Mandatory.
Returns integer. Number of maximum heads supported.
'max_instance':
Read-Only file. Mandatory.
Returns integer. Returns maximum mdev device could be created
at the moment when this file is read. This count would be updated by
vendor driver. Before creating mdev device of this type, check if
max_instance is > 0.
'params'
Write-Only file. Optional.
String input. Libvirt would pass the string given in XML file to
this file and then create mdev device. Set empty string to clear params.
For example, set parameter 'frame_rate_limiter=0' to disable frame rate
limiter for performance benchmarking, then create device of type 11. The
device created would have that parameter set by vendor driver.
'requires_group'
Read-Only file. Optional.
This should be provided by vendor driver if vendor driver need to
group mdev devices in one domain so that vendor driver can use 'first
open' to commit resources of all mdev devices associated to that domain
and 'last close' to free those.
The parent device would look like:
<device>
<name>pci_0000_86_00_0</name>
<capability type='pci'>
<domain>0</domain>
<bus>134</bus>
<slot>0</slot>
<function>0</function>
<capability type='mdev'>
<!-- one type element per sysfs directory -->
<type id='11'>
<!-- one element per sysfs file roughly -->
<name>GRID M60-0B</name>
<attribute name='fb_length'>512M</attribute>
<attribute name='resolution'>2560x1600</attribute>
<attribute name='heads'>2</attribute>
<attribute name='max_instances'>16</attribute>
<attribute name='requires_group'>1</attribute>
</type>
</capability>
<product id='...'>GRID M60</product>
<vendor id='0x10de'>NVIDIA</vendor>
</capability>
</device>
2. Create/destroy mediated device
With above example, vGPU device XML would look like:
<device>
<name>my-vgpu</name>
<parent>pci_0000_86_00_0</parent>
<capability type='mdev'>
<type id='11'/>
<group>1</group>
<params>'frame_rate_limiter=0'</params>
</capability>
</device>
'type id' is mandatory.
'group' is optional. It should be a unique number in the system among
all the groups created for mdev devices. Its usage is:
- not needed if single vGPU device is being assigned to a domain.
- only need to be set if multiple vGPUs need to be assigned to a
domain and vendor driver have 'requires_group' file in type id directory.
- if type id directory include 'requires_group' and user tries to
assign multiple vGPUs to a domain without having <group> field in XML,
it will create single vGPU.
'params' is optional field. User should set this field if extra
parameters need to be set for a particular vGPU device. Libvirt don't
need to parse these params. These are meant for vendor driver.
Libvirt need to follow the sequence to create device:
* Read /sys/../0000\:86\:00.0/11/max_instances. If it is greater than 0,
then only proceed else fail.
* Set extra params if 'params' field exist in device XML and 'params'
file exist in type id directory
echo "frame_rate_limiter=0" > /sys/../0000\:86\:00.0/11/params
* Autogenerate UUID
* Create device:
echo "$UUID:<group>" > /sys/../0000\:86\:00.0/11/create
where <group> is optional. Group should be unique number among all
the groups created for mdev devices.
* Clear params, if set earlier:
echo "" > /sys/../0000\:86\:00.0/11/params
* To destroy device:
echo $UUID > /sys/../0000\:86\:00.0/mdev_destroy
3. Start/stop mediated device
No change or requirement for libvirt as this will be handled by open()
and close() callbacks to vendor driver. In case of multiple devices and
'requires_group' set, this will be handled in 'first open()' and 'last
close()' on device in that group.
4. Launch QEMU/VM
Pass the mdev sysfs path to QEMU as vfio-pci device.
For above vGPU device example:
-device vfio-pci,sysfsdev=/sys/bus/mdev/devices/$UUID
5. QEMU/VM Shutdown sequence
No change or requirement for libvirt.
6. VM Reset
No change or requirement for libvirt as this will be handled via VFIO
reset API and QEMU process will keep running as before.
7. Hot-plug
It is same syntax to create a virtual device for hot-plug.
Thanks,
Kirti
8 years, 1 month
[libvirt] [PATCH] qemu_process: add pid of vm in domain log
by Chen Hanxiao
From: Chen Hanxiao <chenhanxiao(a)gmail.com>
Add pid of VM in domain log.
We used to show this info in debug log.
For example:
If a process send SIGKILL to a qemu process,
we could find something in audit logs.
Then the pid of VM in domain log will be helpful.
Signed-off-by: Chen Hanxiao <chenhanxiao(a)gmail.com>
---
src/qemu/qemu_process.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 27d04a4..8510a89 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -5392,11 +5392,14 @@ qemuProcessLaunch(virConnectPtr conn,
_("Domain %s didn't show up"), vm->def->name);
rv = -1;
}
- VIR_DEBUG("QEMU vm=%p name=%s running with pid=%llu",
- vm, vm->def->name, (unsigned long long)vm->pid);
+ qemuDomainLogContextWrite(logCtxt,
+ "QEMU vm=%p name=%s running with pid=%llu\n",
+ vm,
+ vm->def->name,
+ (unsigned long long)vm->pid);
} else {
- VIR_DEBUG("QEMU vm=%p name=%s failed to spawn",
- vm, vm->def->name);
+ qemuDomainLogContextWrite(logCtxt, "QEMU vm=%p name=%s failed to spawn",
+ vm, vm->def->name);
}
VIR_DEBUG("Writing early domain status to disk");
--
1.8.3.1
8 years, 1 month
[libvirt] Question about QEMU + hugepages + NUMA memory + migration
by Sam Bobroff
Hi libvirt people,
I've been looking at a (probable) bug and I'm not sure how to progress. The
situation is a bit complicated and involves both QEMU and libvirt (and I think
it may have been looked at already) so I would really appreciate some advice on
how to approach it. I'm using a pretty recent master version of libvirt from
git and I'm testing on a ppc64le host with a similar guest but this doesn't
seem to be arch-specific.
If I create a QEMU guest (e.g. via virt-install) that requests both hugepage
backing on the host and NUMA memory placement on the host, the NUMA placement
seems to be ignored. If I do:
# echo 0 > /proc/sys/vm/nr_hugepages
# echo 512 > /sys/devices/system/node/node0/hugepages/hugepages-16384kB/nr_hugepages
# virt-install --name tmp --memory=4096 --graphics none --memorybacking hugepages=yes --disk none --import --wait 0 --numatune=8
... then hugepages are allocated on node 0 and the machine starts successfully,
which seems like a bug.
I believe it should fail to start due to insufficient memory, and in fact that
is what happens if cgroup support isn't detected in the host: there seems to be
a fall-back path in libvirt (probably using mbind()) that works as I would
expect.
Note: the relevant part of the guest XML seems to be this:
»·······<memoryBacking>
»·······»·······<hugepages/>
»·······</memoryBacking>
»·······<numatune>
»·······»·······<memory mode='strict' nodeset='8'/>
»·······</numatune>
It seems fairly clear what is happening: although QEMU is capable of allocating
hugepages on specific NUMA nodes (using "memory-backend-file") libvirt is not
passing those options to QEMU in this situation.
I investigated this line of reasoning and if I hack libvirt to pass those
options to QEMU it does indeed fix the problem... but it renders the machine
state migration-incompatible with unfixed versions. This seems to have been why
this hasn't been fixed already :-(
So what can we do?
I assume it's not acceptible to just break migration with a bugfix, and I can
only think of two ways to fix migration:
(a) Add a new flag to the XML, and for guests without the flag, maintain the
old buggy behaviour (and therefore migration compatability).
(b) Hack QEMU so that migration can succeed between un-fixed and fixed
versions. (And possibly also in the reverse direction?)
I don't like (a) because it's visible in the XML, and would have to be carried
forever (or at least a long time?).
I don't really like (b) either because it's tricky, and even if it could be
made to work reliably, it would add mess and risk to the migration code. I'm
not sure how the QEMU community would feel about it either. However, I did hack
up some code and it worked at least in some simple cases.
Can anyone see a better approach? Is anyone already working on this?
Thanks,
Sam.
8 years, 1 month
[libvirt] [[PATCH v2] 0/4] try harder to get dest qemu errors on migation
by Nikolay Shirokovskiy
Hi, all.
In case migration fails due to destination qemu exits unexpectedly user
recevies the qemu log in the error message. Unfortunately log is truncated and
the most interesting part is missed (below is the example of such a log [1]).
Actually for the most cases the first patch will be enough to fix the issue.
Originally I thought the problem is qemu logging and reading the log are not in
sync (which is true) so I tried to fix it as well in the next patches.
* diff from v1:
1. split changes to libvirtd and virtlogd to different patches
2. split virtlogd patch further
3. simplify handling eofs and hangups in draining function
[1] log example:
CPU Reset (CPU 0)
EAX=00000000 EBX=00000000 ECX=00000000 EDX=00000000
ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000
EIP=00000000 EFL=00000000 [-------] CPL=0 II=0 A20=0 SMM=0 HLT=0
ES =0000 00000000 00000000 00000000
CS =0000 00000000 00000000 00000000
SS =0000 00000000 00000000 00000000
DS =0000 00000000 00000000 00000000
FS =0000 00000000 00000000 00000000
GS =0000 00000000 00000000 00000000
LDT=0000 00000000 00000000 00000000
TR =0000 00000000 00000000 00000000
GDT= 00000000 00000000
IDT= 00000000 00000000
CR0=00000000 CR2=00000000 CR3=00000000 CR4=00000000
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000
DR6=0000000000000000 DR7=0000000000000000
CCS=00000000 CCD=00000000 CCO=DYNAMIC
EFER=0000000000000000
FCW=0000 FSW=0000 [ST=0] FTW=ff MXCSR=00000000
FPR0=0000000000000000 0000 FPR1=0000000000000000 0000
FPR2=0000000000000000 0000 FPR3=0000000000000000 0000
FPR4=0000000000000000 0000 FPR5=0000000000000000 0000
FPR6=0000000000000000 0000 FPR7=0000000000000000 0000
XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000
XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000
XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000
XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000
CPU Reset (CPU 1)
EAX=00000000 EBX=00000000 ECX=00000000 EDX=000206a1
ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000
EIP=0000fff0 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0000 00000000 0000ffff 00009300
CS =f000 ffff0000 0000ffff 00009b00
SS =0000 00000000 0000ffff 00009300
DS =0000 00000000 0000ffff 00009300
FS =0000 00000000 0000ffff 00009300
GS =0000 00000000 0000ffff 00009300
LDT=0000 00000000 0000ffff 00008200
TR =0000 00000000 0000ffff 00008b00
GDT= 00000000 0000ffff
IDT= 00000000 0000ffff
CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000
DR6=00000000ffff0ff0 DR7=0000000000000400
CCS=00000000 CCD=00000000 CCO=DYNAMIC
EFER=0000000000000000
FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80
FPR0=0000000000000000 0000 FPR1=0000000000000000 0000
FPR2=0000000000000000 0000 FPR3=0000000000000000 0000
FPR4=0000000000000000 0000 FPR5=0000000000000000 0000
FPR6=0000000000000000 0000 FPR7=0000000000000000 0000
XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000
XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000
XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000
XMM06=00000000000000000000000000000000 XMM07=000
qemu: terminating on signal 15 from pid 168133
Nikolay Shirokovskiy (4):
util: remove 1k limit for error messages
virtlogd: stop reading on EOF instead of hangup
virtlogd: add flag to wait for log end on read
qemu: if virtlogd is used then read log tail correctly
src/logging/log_handler.c | 46 ++++++++++++++++++++++++++++++++++++++++------
src/logging/log_protocol.x | 5 +++++
src/qemu/qemu_domain.c | 7 ++++++-
src/qemu/qemu_domain.h | 1 +
src/qemu/qemu_process.c | 2 ++
src/util/virerror.c | 9 ++++-----
6 files changed, 58 insertions(+), 12 deletions(-)
--
1.8.3.1
8 years, 1 month