[libvirt] [PATCH v2] cgroup: Use system reported "unlimited" value for comparison
by Viktor Mihajlovski
With kernel 3.18 (since commit 3e32cb2e0a12b6915056ff04601cf1bb9b44f967)
the "unlimited" value for cgroup memory limits has changed once again as
its byte value is now computed from a page counter.
The new "unlimited" value reported by the cgroup fs is therefore 2**51-1
pages which is (VIR_DOMAIN_MEMORY_PARAM_UNLIMITED - 3072). This results
e.g. in virsh memtune displaying 9007199254740988 instead of unlimited
for the limits.
This patch uses the value of memory.limit_in_bytes from the cgroup
memory root which is the system's "real" unlimited value for comparison.
See also libvirt commit 231656bbeb9e4d3bedc44362784c35eee21cf0f4 for the
history for kernel 3.12 and before.
Signed-off-by: Viktor Mihajlovski <mihajlov(a)linux.vnet.ibm.com>
---
v2:
- removed RFC verbiage from commit message
- per Martin's review comment, cache the cgroup memory.limit_in_bytes
- used the cgroup detection logic proposed by Martin, much nicer now indeed
- other than initially planned, fall back to VIR_DOMAIN_MEMORY_PARAM_UNLIMITED
in case of cgroup read failure, since the usual paranoia fits in nicely with
the "already initialized" check. With the current code structure this will
never be called when the memory controller is not configured or mounted
anyway...
- while at it, replaced the goto cleanups with direct returns as there's
really no cleanup to be done in the GetMemoryxxxLimit functions
src/util/vircgroup.c | 67 ++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 49 insertions(+), 18 deletions(-)
diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index f151193..76d9738 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -2452,6 +2452,46 @@ virCgroupGetBlkioDeviceWeight(virCgroupPtr group,
}
+/*
+ * Retrieve the "memory.limit_in_bytes" value from the memory controller
+ * root dir. This value cannot be modified by userspace and therefore
+ * is the maximum limit value supported by cgroups on the local system.
+ * Returns this value scaled to KB or falls back to the original
+ * VIR_DOMAIN_MEMORY_PARAM_UNLIMITED. Either way, remember the return
+ * value to avoid unnecessary cgroup filesystem access.
+ */
+static unsigned long long int virCgroupMemoryUnlimitedKB;
+
+static unsigned long long int
+virCgroupGetMemoryUnlimitedKB(void)
+{
+ virCgroupPtr group;
+ unsigned long long int mem_unlimited;
+
+ if (virCgroupMemoryUnlimitedKB) {
+ return virCgroupMemoryUnlimitedKB;
+ } else {
+ mem_unlimited = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED << 10;
+
+ if (virCgroupNew(-1, "/", NULL, -1, &group) < 0)
+ goto cleanup;
+
+ if (!virCgroupHasController(group, VIR_CGROUP_CONTROLLER_MEMORY))
+ goto cleanup;
+
+ ignore_value(virCgroupGetValueU64(group,
+ VIR_CGROUP_CONTROLLER_MEMORY,
+ "memory.limit_in_bytes",
+ &mem_unlimited));
+ }
+
+ cleanup:
+ virCgroupFree(&group);
+ virCgroupMemoryUnlimitedKB = mem_unlimited >> 10;
+ return virCgroupMemoryUnlimitedKB;
+}
+
+
/**
* virCgroupSetMemory:
*
@@ -2534,20 +2574,17 @@ int
virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb)
{
long long unsigned int limit_in_bytes;
- int ret = -1;
if (virCgroupGetValueU64(group,
VIR_CGROUP_CONTROLLER_MEMORY,
"memory.limit_in_bytes", &limit_in_bytes) < 0)
- goto cleanup;
+ return -1;
*kb = limit_in_bytes >> 10;
- if (*kb > VIR_DOMAIN_MEMORY_PARAM_UNLIMITED)
+ if (*kb >= virCgroupGetMemoryUnlimitedKB())
*kb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
- ret = 0;
- cleanup:
- return ret;
+ return 0;
}
@@ -2596,20 +2633,17 @@ int
virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb)
{
long long unsigned int limit_in_bytes;
- int ret = -1;
if (virCgroupGetValueU64(group,
VIR_CGROUP_CONTROLLER_MEMORY,
"memory.soft_limit_in_bytes", &limit_in_bytes) < 0)
- goto cleanup;
+ return -1;
*kb = limit_in_bytes >> 10;
- if (*kb > VIR_DOMAIN_MEMORY_PARAM_UNLIMITED)
+ if (*kb >= virCgroupGetMemoryUnlimitedKB())
*kb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
- ret = 0;
- cleanup:
- return ret;
+ return 0;
}
@@ -2658,20 +2692,17 @@ int
virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb)
{
long long unsigned int limit_in_bytes;
- int ret = -1;
if (virCgroupGetValueU64(group,
VIR_CGROUP_CONTROLLER_MEMORY,
"memory.memsw.limit_in_bytes", &limit_in_bytes) < 0)
- goto cleanup;
+ return -1;
*kb = limit_in_bytes >> 10;
- if (*kb > VIR_DOMAIN_MEMORY_PARAM_UNLIMITED)
+ if (*kb >= virCgroupGetMemoryUnlimitedKB())
*kb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
- ret = 0;
- cleanup:
- return ret;
+ return 0;
}
--
1.9.1
7 years, 9 months
[libvirt] [PATCH v2 0/4] qemu: assign vfio devices to PCIe addresses when appropriate
by Laine Stump
Patches 1 and 4 were originally added to the end of the "more PCIe
less legacy PCI" patchset in its final incarnation, but all the other
patches were ACKed and pushed, while this needed a bit more work,
resulting in this "faux V2" - although it's the 2nd time I've posted
these patches, their "V1" was really inside the "V11666" of a larger
series.
In order to implement Alex's suggestion of checking the length of a
PCI device's config file to determine if it's an Express device, I
first made a utility function to return the length of a file, and then
another to return the name of the config file of a virPCIDevice -
those are the new patches 2 and 3.
I'm open to suggestions about additional checks to put in
virFileLength(), as long as they don't involve opening the file. For
now I've kept it as simple as possible. (Just keep in mind that I'll be away from my keyboard from Wednesday through Sunday this week).
Laine Stump (4):
util: new function virFileLength()
util: new function virPCIDeviceGetConfigPath()
qemu: propagate virQEMUDriver object to
qemuDomainDeviceCalculatePCIConnectFlags
qemu: assign vfio devices to PCIe addresses when appropriate
src/libvirt_private.syms | 2 +
src/qemu/qemu_domain.c | 2 +-
src/qemu/qemu_domain_address.c | 117 ++++++++++++++++++++++++++++++++++++-----
src/qemu/qemu_domain_address.h | 7 ++-
src/qemu/qemu_hotplug.c | 19 +++----
src/qemu/qemu_process.c | 13 +++--
src/util/virfile.c | 13 +++++
src/util/virfile.h | 2 +
src/util/virpci.c | 6 +++
src/util/virpci.h | 1 +
tests/qemuhotplugtest.c | 4 +-
11 files changed, 157 insertions(+), 29 deletions(-)
--
2.9.3
7 years, 9 months
[libvirt] [PATCH] daemon: Install virt-guest-shutdown.target properly
by Michal Privoznik
When trying to install libvirtd from sources I've noticed the
following failure:
/usr/bin/install: cannot stat 'virt-guest-shutdown.target': No such file or directory
Makefile:2792: recipe for target 'install-init-systemd' failed
make[3]: *** [install-init-systemd] Error 1
make[3]: *** Waiting for unfinished jobs....
The problem is that while other files around that location in
Makefile are firstly generated into the builddir and only after
that installed, virt-guest-shutdown.target file is not generated
at all and should be installed from the srcdir.
This was introduced in 01079727.
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
Pushed under build-breaker & trivial rules.
daemon/Makefile.am | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/daemon/Makefile.am b/daemon/Makefile.am
index 463db6e..60c7368 100644
--- a/daemon/Makefile.am
+++ b/daemon/Makefile.am
@@ -457,7 +457,7 @@ install-init-systemd: install-sysconfig libvirtd.service
$(MKDIR_P) $(DESTDIR)$(SYSTEMD_UNIT_DIR)
$(INSTALL_DATA) libvirtd.service \
$(DESTDIR)$(SYSTEMD_UNIT_DIR)/libvirtd.service
- $(INSTALL_DATA) virt-guest-shutdown.target \
+ $(INSTALL_DATA) $(srcdir)/virt-guest-shutdown.target \
$(DESTDIR)$(SYSTEMD_UNIT_DIR)/virt-guest-shutdown.target
uninstall-init-systemd: uninstall-sysconfig
--
2.8.4
7 years, 9 months
[libvirt] [PATCH] virt-gnutls.m4: fixed check for gnutls_rnd and gnutls_cipher_encrypt
by Daniel P. Berrange
From: Nikos Mavrogiannopoulos <nmav(a)redhat.com>
---
m4/virt-gnutls.m4 | 3 +++
1 file changed, 3 insertions(+)
I thought this was onlist already, but there was a typo
in To, so neither the original patch or my ACK made it
to the list. I pushed as a build fix.
diff --git a/m4/virt-gnutls.m4 b/m4/virt-gnutls.m4
index b850828..5bca950 100644
--- a/m4/virt-gnutls.m4
+++ b/m4/virt-gnutls.m4
@@ -54,8 +54,11 @@ AC_DEFUN([LIBVIRT_CHECK_GNUTLS],[
#include <gnutls/gnutls.h>
]])
+ OLD_LIBS="$LIBS"
+ LIBS="$LIBS $GNUTLS_LIBS"
AC_CHECK_FUNC([gnutls_rnd])
AC_CHECK_FUNC([gnutls_cipher_encrypt])
+ LIBS=$OLD_LIBS
fi
])
--
2.9.3
7 years, 9 months
[libvirt] [PATCH] docs: improve release numbering rule for minor numbers
by Boris Fiuczynski
Adding first build of year minor number reset to 0.
Signed-off-by: Boris Fiuczynski <fiuczy(a)linux.vnet.ibm.com>
---
docs/downloads.html.in | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/docs/downloads.html.in b/docs/downloads.html.in
index f34e9a6..1f2ec1d 100644
--- a/docs/downloads.html.in
+++ b/docs/downloads.html.in
@@ -383,7 +383,8 @@
<dd>incremented by 1 for the first release of the year (the
Jan 15th release)</dd>
<dt><code>minor</code></dt>
- <dd>incremented by 1 for each monthly release from git master</dd>
+ <dd>set to 0 with every major increment or incremented by 1
+ for each monthly release from git master</dd>
<dt><code>micro</code></dt>
<dd>always 0 for releases from git master, incremented by 1
for each stable maintenance release</dd>
--
2.5.5
7 years, 9 months
[libvirt] [PATCH] fix parsing security labels from virt-aa-helper
by Christian Ehrhardt
When parsing labels virt-aa-helper does no more pass
VIR_DOMAIN_DEF_PARSE_INACTIVE due to dfbc9a83 that tried to mitigate the
changes of a89f05ba. For those it had to switch from
VIR_DOMAIN_DEF_PARSE_INACTIVE to active since we need the domain id
(ctl->def->id) as it is part of the socket path now which is needed for the
aa profile.
But that turned out to break non apparmor seclabels as well as apparmor
seclabels in xmls without labels.
In those cases due to VIR_DOMAIN_DEF_PARSE_INACTIVE now not set anymore
virSecurityLabelDefParseXML insists on finding labels. Cases:
- non-apparmor seclabel - virt-aa-helper breaks
- apparmor seclabel without labels on a defined domain - virt-aa-helper breaks
This was not spotted due to labels getting dynamically created on definition.
So "define, start, stop" works. But "define, edit (add label), start" does not.
Now turning back on VIR_DOMAIN_DEF_PARSE_INACTIVE would cause the old bug, so
we have to differ those more fine grained. This is done by the new flag
VIR_DOMAIN_DEF_PARSE_SKIP_ACTIVE_LABEL which is like
VIR_DOMAIN_DEF_PARSE_INACTIVE but only for the security labels.
So far only set by virt-aa-helper.
Testcase with virt-aa-helper on xml file:
virt-aa-helper -d -r -p 0 -u libvirt-<uuid> < your-guest.xml
virt-aa-helper: error: could not parse XML
virt-aa-helper: error: could not get VM definition
(That should have printed a valid apparmor profile)
Signed-off-by: Christian Ehrhardt <christian.ehrhardt(a)canonical.com>
---
src/conf/domain_conf.c | 6 ++++--
src/conf/domain_conf.h | 3 +++
src/security/virt-aa-helper.c | 1 +
3 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 03506cb..9eb7883 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -6626,7 +6626,8 @@ virSecurityLabelDefParseXML(xmlXPathContextPtr ctxt,
* if the 'live' VM XML is requested
*/
if (seclabel->type == VIR_DOMAIN_SECLABEL_STATIC ||
- (!(flags & VIR_DOMAIN_DEF_PARSE_INACTIVE) &&
+ (!(flags & (VIR_DOMAIN_DEF_PARSE_SKIP_ACTIVE_LABEL |
+ VIR_DOMAIN_DEF_PARSE_INACTIVE)) &&
seclabel->type != VIR_DOMAIN_SECLABEL_NONE)) {
p = virXPathStringLimit("string(./label[1])",
VIR_SECURITY_LABEL_BUFLEN-1, ctxt);
@@ -6642,7 +6643,8 @@ virSecurityLabelDefParseXML(xmlXPathContextPtr ctxt,
/* Only parse imagelabel, if requested live XML with relabeling */
if (seclabel->relabel &&
- (!(flags & VIR_DOMAIN_DEF_PARSE_INACTIVE) &&
+ (!(flags & (VIR_DOMAIN_DEF_PARSE_SKIP_ACTIVE_LABEL |
+ VIR_DOMAIN_DEF_PARSE_INACTIVE)) &&
seclabel->type != VIR_DOMAIN_SECLABEL_NONE)) {
p = virXPathStringLimit("string(./imagelabel[1])",
VIR_SECURITY_LABEL_BUFLEN-1, ctxt);
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 24aa79c..90693c6 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2657,6 +2657,9 @@ typedef enum {
VIR_DOMAIN_DEF_PARSE_ABI_UPDATE = 1 << 9,
/* skip definition validation checks meant to be executed on define time only */
VIR_DOMAIN_DEF_PARSE_SKIP_VALIDATE = 1 << 10,
+ /* in regard to security labels, skip parts of the XML that would only be
+ * present in an active libvirt XML. */
+ VIR_DOMAIN_DEF_PARSE_SKIP_ACTIVE_LABEL = 1 << 11,
} virDomainDefParseFlags;
typedef enum {
diff --git a/src/security/virt-aa-helper.c b/src/security/virt-aa-helper.c
index 77eeaff..0ca4c83 100644
--- a/src/security/virt-aa-helper.c
+++ b/src/security/virt-aa-helper.c
@@ -705,6 +705,7 @@ get_definition(vahControl * ctl, const char *xmlStr)
ctl->def = virDomainDefParseString(xmlStr,
ctl->caps, ctl->xmlopt, NULL,
+ VIR_DOMAIN_DEF_PARSE_SKIP_ACTIVE_LABEL |
VIR_DOMAIN_DEF_PARSE_SKIP_VALIDATE);
if (ctl->def == NULL) {
--
2.7.4
7 years, 9 months
[libvirt] [RFC v2 00/20] qmp: Report bus information on 'query-machines'
by Eduardo Habkost
Changes v1 -> v2:
* v1 series subject was:
"qmp: Report supported device types on 'query-machines'"
* Now we return additional bus information: bus ID, bus type,
and the list of accepted device types on each bus
* Now hotplug can be covered because accepted-device-types can
be set by individual bus instances if necessary
* Now the new field is optional, and every machine having the
new field are now validated in "strict mode" on test code
(meaning all buses must be present on the list)
* TODO: only PC was converted, machines from other
architectures don't include the field yet
* Legacy-PCI vs PCIe is now handled:
* PCIDeviceClass::is_express field was removed
* Defined INTERFACE_LEGACY_PCI_DEVICE and INTERFACE_PCIE_DEVICE,
buses and devices now report appropriate interface names
* Now PCIe buses can return the right information
because each bus instance can set its own
accepted-device-types list
* TODO: PCIe pci-bridge classes need to be changed to not include
INTERFACE_LEGACY_PCI_DEVICE
* TODO: replace q35 hack with appropriate code that set
Legacy-PCI rules in PCI code
* Removed patches from v1:
* pc: Initialize default bus lists
* s390x: Initialize default bus lists
* arm: Initialize default bus lists
* mips: Initialize default bus lists
* ppc: Initialize default bus lists
* qdev: Add device_class_set_bus_type() function
(validation is more difficult with the PCI/PCIe rules, plan
is to try to remove DeviceClass::bus_type field, se
"Limitations" below)
* See individual patches for additional details
The Problem
===========
Currently management software has no way to find out which device
types can be plugged in a machine, unless the machine is already
initialized.
Even after the machine is initialized, there's no way to map
existing bus types to supported device types unless management
software hardcodes the mapping between bus types and device
types.
Example: floppy support on q35 vs i440fx
----------------------------------------
There's no way for libvirt to find out that there's no floppy
controller on pc-q35-* machine-types by default.
With this series, pc-i440fx-* will report "floppy" as a supported
device type, but pc-q35-* will not.
Example: Legacy PCI vs vs PCIe devices
--------------------------------------
Some devices require a PCIe bus to be available, others work on
both legacy PCI and PCIe, while others work only on a legacy PCI
bus.
Currently management software has no way to know which devices
can be added to a given machine, unless it hardcodes machine-type
names and device-types names.
Example: spapr and PCIe root bus
--------------------------------
See the thread at:
Subject: [RFC PATCH qemu] spapr_pci: Create PCI-express root bus by default
If we make new spapr machine-type versions create a PCIe root
bus, management software will need a way to find out:
1) The type of the default bus for the machine type;
2) The ID of the default bus for the machine type.
Otherwise, management software will have to hardcode it based on
machine-type version. The proposed interface should solve this
problem.
The Proposed Interface
======================
Bus info on query-machines
--------------------------
This series adds a new field to the output of 'query-machines':
'always-available-buses'. It will contain a a list of
MachineBusInfo structs. MachineBusInfo will contain:
* bus-id: The bus ID
* bus-type: The bus type
* accepted-device-types: A list of accepted device types for the bus.
bus-id can be used to find out what's the right "bus" argument to
be used when adding a device to the machine configuration (e.g.
when using -device and device_add).
accepted-device-types can be used as the 'implements' argument on
the 'qom-list-types' command, to find out which device types can
be plugged on the bus.
accepted-device-types property on bus objects
---------------------------------------------
This series also adds a 'accepted-device-types' property to bus
objects, so management software can check which kinds of devices
can be plugged at runtime.
Example output
--------------
TODO: update it.
Considered alternatives
=======================
Indirect mapping (machine => bus => device)
-------------------------------------------
This RFC implements a mechanism to implement ax
machine-type => accepted-device-types
mapping. An alternative solution I considered was to expose an
indirect mapping:
machine-type => default-bus-types
followed by
bus-type => accepted-device-types.
But some buses have no correct bus-type => accepted-device-types
mapping. PCIe buses, for example, may or may not accept legacy
PCI buses, depending on the machine and the bus topology.
imposes less restrictions on how the device and bus type
hierarchy is implemented inside QEMU. There's still a
machine-type => bus-type => device-type
mapping implemented internally, but it is an implementation
detail on the current version, and not part of the
externally-visible interface.
The Implementation
==================
This add a new field to MachineClass: always_available_buses, a
new field to BusState: accepted_device_types, and a new field to
BusClass: device_type.
On most cases, the normal QOM type hierarchy is used to define
the set of accepted devices for a bus. On the case of PCI buses,
INTERFACE_PCIE_DEVICE and INTERFACE_LEGACY_PCI_DEVICE interface
names were introduced, to indicate the set of devices accepted by
PCI buses.
That means we are duplicating information:
MachineClass::always_available_buses duplicates knowledge that is
already encoded in the machine init function.
To make sure the information is correct, a qmp-machine-info.py
test case is added, that will validate the supported-device-types
field based on the buses created by the machine.
Test Code
---------
qdev-based test code for the new field was implemented in a
Python script. Some extra support was implemented in
tests/Makefile.include, scripts/qemu.py and scripts/qtest.py to
allow the test to be implemented.
Limitations
===========
TODO: "slots" vs "buses"
------------------------
The current interface is returning information about "available
buses". Future versions might return more detailed information
about "slots" where devices can be plugged. This would make the
proposed mechanism more flexible, and cover cases like CPU
hotplug (in other words, the new interface could replace
query-hotpluggable-cpus in a generic way).
"default defaults" vs "-nodefault defaults"
-------------------------------------------
libvirt use -nodefaults when starting QEMU, so knowing which
buses are available when using -nodefaults is more interesting
for them.
Other software, on the other hand, might be interested in the
results without -nodefaults.
The new 'always-available-bus' field won't cover the more general
cases, where "-nodefaults" is not used, or when there are extra
QEMU options that create more buses (e.g. "-machine usb=on", or
explicit -device options that would create additional buses).
Additional mechanisms may be added to let management software to
ask which buses are created when extra options are used, but
maybe thoses cases would be more easily solved if management
software checks the bus list at runtime (after the machine was
actually created).
TYPE_SYS_BUS_DEVICE is too generic
----------------------------------
Currently all machines have a TYPE_SYS_BUS bus, and all
TYPE_SYS_BUS_DEVICE subclasses are reported as supported by that
bus.
On the other hand, omitting sysbus would make query-machines lie
about supporting sysbus devices, and would require management
software to add special cases for sysbus devices. For this
reason, sysbus is being included in the output of query-machines,
although its accepted-device-types info is less useful than the
information about other buses.
Future versions of QEMU can keep the same interface, but make it
include more useful information, e.g., return more specific bus
info, with type names that exclude sysbus devices that can't
possibly work with the machine.
PCI vs PCIe
-----------
This was a limitation on v1, but I believe the problem was
addressed on v2.
This series defines two new interface names:
INTERFACE_LEGACY_PCI_DEVICE and INTERFACE_PCIE_DEVICE.
* Legacy PCI devices will implement INTERFACE_LEGACY_PCI_DEVICE.
* PCIe devices will implement INTERFACE_PCIE_DEVICE.
* Hybrid devices that support both will implement both
* interfaces.
Buses will have accepted-device-types set accordingly:
* Legacy PCI buses will report INTERFACE_LEGACY_PCI_DEVICE on
accepted-device-types.
* PCIe buses that accept only PCIe devices will report
INTERFACE_PCIE_DEVICE on accepted-device-types.
* PCIe buses that accept both PCIe and legacy PCI devices will
report both INTERFACE_PCIE_DEVICE and
INTERFACE_LEGACY_PCI_DEVICE on accepted-device-types.
The Q35 root bus is the only existing example of a hybrid bus.
See the next section about the hack where this is implemented.
Q35 root bus hack
-----------------
This series includes a hack on Q35 code to add
INTERFACE_LEGACY_PCI_DEVICE to accepted-device-types on the root
PCIe bus. Suggestions on where this information should be encoded
are welcome.
Only PC is covered
------------------
This series sets always-available-buses only on the PC
machine-types, by now. Future versions might have the field added
to other machines. sPAPR is an obvious candidate, as there are
plans to change new spapr machine-type versions to use PCIe as
the root bus.
Out of scope: Configurable buses
--------------------------------
There's no way to map machine options like "usb=on|off" to
device-types or buses. This is not on the scope of this series.
Additional mechanisms may be added to let management software to
ask which buses are created when extra options are used, but
maybe thoses cases would be more easily solved if management
software checks the bus list at runtime (after the machine was
actually created).
Existing DeviceClass::bus_type field
------------------------------------
The new BusClass::device_type and BusState::accepted_device_types
fields duplicate knowledge that is already encoded in
DeviceClass::bus_type.
v1 of this series included code that validated both fields to
ensure they are consistent with each other. But the PCI/PCIe
hierarchy makes validation more difficult, so the validation code
was removed on v2.
The DeviceClass::bus_type abstraction breaks in the case of
hybrid PCI buses, because only a bus type name is not enough
information to find the right bus to plug a device. This needs to
be addressed somehow (maybe DeviceClass::bus_type can be
completely removed), but I didn't look at the device_add code to
find out the best way to do that.
---
Cc: libvir-list(a)redhat.com
Cc: Laine Stump <laine(a)redhat.com>
Cc: Alexey Kardashevskiy <aik(a)ozlabs.ru>
Cc: Greg Kurz <groug(a)kaod.org>
Cc: David Gibson <david(a)gibson.dropbear.id.au>
Cc: Andrea Bolognani <abologna(a)redhat.com>,
Cc: Eric Blake <eblake(a)redhat.com>
Cc: Cornelia Huck <cornelia.huck(a)de.ibm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Eduardo Habkost (20):
qemu.py: Make logging optional
qtest.py: Support QTEST_LOG environment variable
qtest.py: make logging optional
qtest.py: Make 'binary' parameter optional
tests: Add rules to non-gtester qtest test cases
qdev: Add 'accepted-device-types' property to BusClass
qmp: Add 'always-available-buses' field to 'query-machines'
virtio-pci: Set PCIDeviceClass::is_express=1
vmxnet3: Set PCIDeviceClass::is_express=1
pvscsi: Set PCIDeviceClass::is_express=1
pci: INTERFACE_LEGACY_PCI_DEVICE and INTERFACE_PCIE_DEVICE interfaces
pci: Replace is_express with INTERFACE_PCIE_DEVICE
[trivial] edu: Move edu_info outside function
[automated] Add INTERFACE_LEGACY_PCI_DEVICE to all PCI device
subclasses
eepro100: Add INTERFACE_LEGACY_PCI_DEVICE
[incomplete] remove INTERFACE_LEGACY_PCI_DEVICE from PCIe-only devices
pci: Set device_type on bus classes
q35: Hack to make root bus accept legacy PCI devices
pci: validate interfaces on base_class_init
pc: Initialize default bus lists
hw/acpi/piix4.c | 1 +
hw/audio/ac97.c | 4 +
hw/audio/es1370.c | 4 +
hw/audio/intel-hda.c | 11 +++
hw/block/fdc.c | 15 +++-
hw/block/nvme.c | 5 +-
hw/char/serial-pci.c | 12 +++
hw/char/virtio-serial-bus.c | 1 +
hw/core/bus.c | 21 +++++
hw/core/machine.c | 33 ++++++-
hw/core/sysbus.c | 1 +
hw/display/cirrus_vga.c | 4 +
hw/display/qxl.c | 4 +
hw/display/vga-pci.c | 4 +
hw/display/vmware_vga.c | 4 +
hw/i2c/core.c | 7 ++
hw/i2c/smbus_ich9.c | 4 +
hw/i386/amd_iommu.c | 4 +
hw/i386/kvm/pci-assign.c | 4 +
hw/i386/pc.c | 1 +
hw/i386/pc_piix.c | 29 ++++++
hw/i386/pc_q35.c | 21 +++++
hw/i386/xen/xen_platform.c | 4 +
hw/i386/xen/xen_pvdevice.c | 4 +
hw/ide/ich.c | 4 +
hw/ide/pci.c | 4 +
hw/ide/qdev.c | 1 +
hw/input/adb.c | 7 ++
hw/ipack/ipack.c | 7 ++
hw/ipack/tpci200.c | 4 +
hw/isa/i82378.c | 4 +
hw/isa/isa-bus.c | 1 +
hw/isa/lpc_ich9.c | 1 +
hw/isa/piix4.c | 4 +
hw/isa/vt82c686.c | 16 ++++
hw/mips/gt64xxx_pci.c | 4 +
hw/misc/auxbus.c | 1 +
hw/misc/edu.c | 20 +++--
hw/misc/ivshmem.c | 4 +
hw/misc/macio/macio.c | 4 +
hw/misc/pci-testdev.c | 4 +
hw/net/e1000.c | 4 +
hw/net/e1000e.c | 5 +-
hw/net/eepro100.c | 5 ++
hw/net/ne2000.c | 4 +
hw/net/pcnet-pci.c | 4 +
hw/net/rocker/rocker.c | 4 +
hw/net/rtl8139.c | 4 +
hw/net/vmxnet3.c | 36 ++------
hw/pci-bridge/dec.c | 4 +
hw/pci-bridge/ioh3420.c | 6 +-
hw/pci-bridge/pci_expander_bridge.c | 8 ++
hw/pci-bridge/xio3130_downstream.c | 6 +-
hw/pci-bridge/xio3130_upstream.c | 6 +-
hw/pci-host/apb.c | 4 +
hw/pci-host/bonito.c | 4 +
hw/pci-host/gpex.c | 4 +
hw/pci-host/grackle.c | 4 +
hw/pci-host/piix.c | 8 ++
hw/pci-host/ppce500.c | 4 +
hw/pci-host/prep.c | 4 +
hw/pci-host/q35.c | 14 +++
hw/pci-host/uninorth.c | 16 ++++
hw/pci-host/versatile.c | 4 +
hw/pci/pci.c | 41 ++++++++-
hw/pci/pci_bridge.c | 4 +
hw/ppc/ppc4xx_pci.c | 4 +
hw/ppc/spapr_vio.c | 1 +
hw/s390x/css-bridge.c | 2 +
hw/s390x/event-facility.c | 1 +
hw/s390x/s390-pci-bus.c | 7 ++
hw/scsi/esp-pci.c | 4 +
hw/scsi/lsi53c895a.c | 4 +
hw/scsi/megasas.c | 12 ++-
hw/scsi/mptsas.c | 4 +
hw/scsi/scsi-bus.c | 1 +
hw/scsi/vmw_pvscsi.c | 29 ++----
hw/sd/core.c | 7 ++
hw/sd/sdhci.c | 4 +
hw/sh4/sh_pci.c | 4 +
hw/sparc64/sun4u.c | 4 +
hw/ssi/ssi.c | 7 ++
hw/usb/bus.c | 1 +
hw/usb/dev-smartcard-reader.c | 7 ++
hw/usb/hcd-ehci-pci.c | 4 +
hw/usb/hcd-ohci.c | 4 +
hw/usb/hcd-uhci.c | 4 +
hw/usb/hcd-xhci.c | 5 +-
hw/vfio/pci-quirks.c | 4 +
hw/vfio/pci.c | 6 +-
hw/virtio/virtio-bus.c | 1 +
hw/virtio/virtio-pci.c | 27 +++---
hw/watchdog/wdt_i6300esb.c | 4 +
hw/xen/xen_pt.c | 4 +
include/hw/boards.h | 7 ++
include/hw/pci/pci.h | 9 +-
include/hw/qdev-core.h | 8 ++
qapi-schema.json | 37 +++++++-
scripts/qemu.py | 25 ++++--
scripts/qtest.py | 15 +++-
tests/Makefile.include | 39 +++++++-
tests/qmp-machine-info.py | 173 ++++++++++++++++++++++++++++++++++++
vl.c | 6 ++
103 files changed, 882 insertions(+), 102 deletions(-)
create mode 100755 tests/qmp-machine-info.py
--
2.7.4
7 years, 9 months
[libvirt] [PATCH 0/3] Better documentation of migration APIs and flags
by Jiri Denemark
We have 6 public APIs for migration: virDomainMigrate,
virDomainMigrate2, virDomainMigrate3, virDomainMigrateToURI,
virDomainMigrateToURI2, and virDomainMigrateToURI3. Each of them was
documented separately, but the documentation was not consistent. Some
APIs documented individual flags, some didn't do that. Some notes about
specific behavior were not added to all APIs. And so on. Since the newer
versions provide a superset of the functionality of the older ones
mainly adding new parameters, it doesn't really make sense to keep 6
(ideally) almost identical copies of the same documentation.
To get out of this mess this patch set moves documentation of individual
migration flags directly to libvirt-domain.h, where the flags are
defined. And the documentation of virDomainMigrate{,ToURI}{,2} is
reduced to point to the *3 APIs with a description of how old parameters
are mapped to the new ones.
Jiri Denemark (3):
Enhance documentation of virDomainMigrateFlags
Consolidate documentation of virDomainMigrate{,ToURI}{,2,3}
docs: Update news
docs/news.html.in | 2 +
include/libvirt/libvirt-domain.h | 153 ++++++++++++++---
src/libvirt-domain.c | 348 ++++-----------------------------------
3 files changed, 170 insertions(+), 333 deletions(-)
--
2.11.0.rc2
7 years, 9 months
[libvirt] [RFC PATCH 1/1] gluster: cache glfs connection object per volume
by Prasanna Kumar Kalever
This patch offer,
1. Optimizing the calls to glfs_init() and friends
2. Temporarily reduce the memory leak appear in libvirt process account,
even if the root cause for the leaks are glfs_fini() (7 - 10MB per object)
[Hopefully gluster should address this in its future releases, not very near]
Currently, a start of a VM will call 2 glfs_new/glfs_init (which will create
glfs object, once for stat, read headers and next to chown) and then will fork
qemu process which will call once again (for actual read write IO).
Not that all, in case if we are have 4 extra attached disks, then the total
calls to glfs_init() and friends will be (4+1)*2 in libvirt and (4+1)*1 in
qemu space i.e 15 calls. Since we don't have control over qemu process as that
executes in a different process environment, lets do not bother much about it.
This patch shrinks these 10 calls (i.e objects from above example) to just
one, by maintaining a cache of glfs objects.
The glfs object is shared across other only if volume name and all the
volfile servers match. In case of hit glfs object takes a ref and
only on close unref happens.
Thanks to 'Peter Krempa' for the discussion.
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever(a)redhat.com>
---
WORK IN PROGRESS: (WIP)
----------------
While initially caching the glfs object, i.e. in
virStorageBackendGlusterSetPreopened() I have took a ref=2, so on the following
virStorageBackendGlusterClosePreopened() --ref will make ref=1, which will
help not cleaning up the object which has to be shared with next coming disks
(if conditions meat).
Given some context, the idea is that on time-out (or after all disks are
initiallized) some one should call virStorageBackendGlusterClosePreopened()
which will ideally make ref=0, hence cached object will be cleaned/deleted
calling glfs_fini()
I had a thought of doing the time-out cleanup call in
virSecurityManagerSetAllLabel() or similar, but that looks too odd for me?
Some help ?
Thanks in advance.
---
src/storage/storage_backend_gluster.c | 136 ++++++++++++++++++++++++++++++++--
src/storage/storage_backend_gluster.h | 33 ++++++++-
2 files changed, 160 insertions(+), 9 deletions(-)
diff --git a/src/storage/storage_backend_gluster.c b/src/storage/storage_backend_gluster.c
index 8e86704..4f53ebc 100644
--- a/src/storage/storage_backend_gluster.c
+++ b/src/storage/storage_backend_gluster.c
@@ -47,19 +47,132 @@ struct _virStorageBackendGlusterState {
char *dir; /* dir from URI, or "/"; always starts and ends in '/' */
};
+virGlusterDefPtr ConnCache = {0,};
+
typedef struct _virStorageBackendGlusterState virStorageBackendGlusterState;
typedef virStorageBackendGlusterState *virStorageBackendGlusterStatePtr;
+void
+virStorageBackendGlusterSetPreopened(virStorageSourcePtr src, glfs_t *fs)
+{
+ size_t i;
+ virStorageBackendGlusterStatePtrPreopened entry = NULL;
+
+ if (ConnCache == NULL && (VIR_ALLOC(ConnCache) < 0))
+ return;
+
+ for (i = 0; i < ConnCache->nConn; i++) {
+ if (STREQ(ConnCache->Conn[i]->volname, src->volume))
+ return;
+ }
+
+ if (VIR_ALLOC(entry) < 0)
+ goto L1;
+
+ if (VIR_STRDUP(entry->volname, src->volume) < 0)
+ goto L1;
+
+ entry->nhosts = src->nhosts;
+ for (i = 0; i < src->nhosts; i++) {
+ if (VIR_ALLOC_N(entry->hosts[i], strlen(src->hosts[i].name) + 1) < 0)
+ goto L2;
+ strcpy(entry->hosts[i], src->hosts[i].name);
+ }
+
+ entry->fs = fs;
+ entry->ref = 2; /* persist glfs obj per volume until a final timeout
+ virStorageBackendGlusterClosePreopened() is called */
+
+ if (VIR_INSERT_ELEMENT(ConnCache->Conn, -1, ConnCache->nConn, entry) < 0)
+ goto L2;
+
+ return;
+
+L2:
+ for (i = 0; i < entry->nhosts; i++)
+ VIR_FREE(entry->hosts[i]);
+L1:
+ if(ConnCache->nConn == 0)
+ VIR_FREE(ConnCache);
+ VIR_FREE(entry->volname);
+ VIR_FREE(entry);
+}
+
+glfs_t *
+virStorageBackendGlusterFindPreopened(virStorageSourcePtr src)
+{
+ size_t i, j, k, ret = 0;
+ size_t min, max;
+
+ if (ConnCache == NULL)
+ return NULL;
+
+ virStorageBackendGlusterStatePtrPreopened entry;
+
+ for (i = 0; i < ConnCache->nConn; i++) {
+ entry = ConnCache->Conn[i];
+ if (STREQ(entry->volname, src->volume)) {
+ min = entry->nhosts < src->nhosts ? entry->nhosts : src->nhosts;
+ max = entry->nhosts >= src->nhosts ? entry->nhosts : src->nhosts;
+ for (j = 0; j< min; j++) {
+ if (entry->nhosts == min) {
+ for (k = 0; k < max; k++) {
+ if (STREQ(entry->hosts[j], src->hosts[k].name)) {
+ ret = 1;
+ break;
+ }
+ }
+ if (!ret)
+ return NULL;
+ } else {
+ for (k = 0; k < max; k++) {
+ if (STREQ(src->hosts[j].name, entry->hosts[k])) {
+ ret = 1;
+ break;
+ }
+ }
+ if (!ret)
+ return NULL;
+ }
+ }
+ entry->ref++;
+ return entry->fs;
+ }
+ }
+ return NULL;
+}
+
+int
+virStorageBackendGlusterClosePreopened(glfs_t *fs)
+{
+ size_t i;
+ int ret = 0;
+
+ if (fs == NULL)
+ return ret;
+
+ for (i = 0; i < ConnCache->nConn; i++) {
+ if (ConnCache->Conn[i]->fs == fs) {
+ if (--ConnCache->Conn[i]->ref)
+ return ret;
+
+ ret = glfs_fini(ConnCache->Conn[i]->fs);
+ VIR_FREE(ConnCache->Conn[i]->volname);
+ VIR_FREE(ConnCache->Conn[i]);
+
+ VIR_DELETE_ELEMENT(ConnCache->Conn, i, ConnCache->nConn);
+ }
+ }
+ return ret;
+}
+
static void
virStorageBackendGlusterClose(virStorageBackendGlusterStatePtr state)
{
if (!state)
return;
- /* Yuck - glusterfs-api-3.4.1 appears to always return -1 for
- * glfs_fini, with errno containing random data, so there's no way
- * to tell if it succeeded. 3.4.2 is supposed to fix this.*/
- if (state->vol && glfs_fini(state->vol) < 0)
+ if (state->vol && virStorageBackendGlusterClosePreopened(state->vol) < 0)
VIR_DEBUG("shutdown of gluster volume %s failed with errno %d",
state->volname, errno);
@@ -556,8 +669,7 @@ virStorageFileBackendGlusterDeinit(virStorageSourcePtr src)
src, src->hosts->name, src->hosts->port ? src->hosts->port : "0",
src->volume, src->path);
- if (priv->vol)
- glfs_fini(priv->vol);
+ virStorageBackendGlusterClosePreopened(priv->vol);
VIR_FREE(priv->canonpath);
VIR_FREE(priv);
@@ -630,11 +742,20 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
src, priv, src->volume, src->path,
(unsigned int)src->drv->uid, (unsigned int)src->drv->gid);
+
+ priv->vol = virStorageBackendGlusterFindPreopened(src);
+ if (priv->vol) {
+ src->drv->priv = priv;
+ return 0;
+ }
+
if (!(priv->vol = glfs_new(src->volume))) {
virReportOOMError();
goto error;
}
+ virStorageBackendGlusterSetPreopened(src, priv->vol);
+
for (i = 0; i < src->nhosts; i++) {
if (virStorageFileBackendGlusterInitServer(priv, src->hosts + i) < 0)
goto error;
@@ -652,8 +773,7 @@ virStorageFileBackendGlusterInit(virStorageSourcePtr src)
return 0;
error:
- if (priv->vol)
- glfs_fini(priv->vol);
+ virStorageBackendGlusterClosePreopened(priv->vol);
VIR_FREE(priv);
return -1;
diff --git a/src/storage/storage_backend_gluster.h b/src/storage/storage_backend_gluster.h
index 6796016..a0326aa 100644
--- a/src/storage/storage_backend_gluster.h
+++ b/src/storage/storage_backend_gluster.h
@@ -22,9 +22,40 @@
#ifndef __VIR_STORAGE_BACKEND_GLUSTER_H__
# define __VIR_STORAGE_BACKEND_GLUSTER_H__
-# include "storage_backend.h"
+#include "storage_backend.h"
+#include <glusterfs/api/glfs.h>
extern virStorageBackend virStorageBackendGluster;
extern virStorageFileBackend virStorageFileBackendGluster;
+
+struct _virStorageBackendGlusterStatePreopened {
+ char *volname;
+ size_t nhosts;
+ char *hosts[1024]; /* FIXME: 1024 ? */
+ glfs_t *fs;
+ int ref;
+};
+
+typedef struct _virStorageBackendGlusterStatePreopened virStorageBackendGlusterStatePreopened;
+typedef virStorageBackendGlusterStatePreopened *virStorageBackendGlusterStatePtrPreopened;
+
+struct _virGlusterDef {
+ size_t nConn;
+ virStorageBackendGlusterStatePtrPreopened *Conn;
+};
+
+typedef struct _virGlusterDef virGlusterDef;
+typedef virGlusterDef *virGlusterDefPtr;
+
+extern virGlusterDefPtr ConnCache;
+
+void
+virStorageBackendGlusterSetPreopened(virStorageSourcePtr src, glfs_t *fs);
+
+glfs_t*
+virStorageBackendGlusterFindPreopened(virStorageSourcePtr src);
+
+int
+virStorageBackendGlusterClosePreopened(glfs_t *fs);
#endif /* __VIR_STORAGE_BACKEND_GLUSTER_H__ */
--
2.7.4
7 years, 9 months