[PATCH] util: Add phys_port_name support on virPCIGetNetName
by Dmytro Linkin
Current virPCIGetNetName() logic is to get net device name by checking
it's phys_port_id, if caller provide it, or by it's index (eg, by it's
position at sysfs net directory). This approach worked fine up until
linux kernel version 5.8, where NVIDIA Mellanox driver implemented
linking of VFs' representors to PCI device in switchdev mode. This mean
that device's sysfs net directory will hold multiple net devices. Ex.:
$ ls '/sys/bus/pci/devices/0000:82:00.0/net'
ens1f0 eth0 eth1
Most switch devices support phys_port_name instead of phys_port_id, so
virPCIGetNetName() will try to get PF name by it's index - 0. The
problem here is that the PF nedev entry may not be the first.
To fix that, for switch devices, we introduce a new logic to select the
PF uplink netdev according to the content of phys_port_name. Extend
virPCIGetNetName() with physPortNameRegex variable to get proper device
by it's phys_port_name scheme, for ex., "p[0-9]+$" to get PF,
"pf[0-9]+vf[0-9]+$" to get VF or "p1$" to get exact net device. So now
virPCIGetNetName() logic work in following sequence:
- filter by phys_port_id, if it's provided,
or
- filter by phys_port_name, if it's regex provided,
or
- get net device by it's index (position) in sysfs net directory.
Also, make getting content of iface sysfs files more generic.
Signed-off-by: Dmytro Linkin <dlinkin(a)nvidia.com>
Reviewed-by: Adrian Chiris <adrianc(a)nvidia.com>
---
src/hypervisor/virhostdev.c | 2 +-
src/util/virnetdev.c | 74 ++++++++++++++++++++++++++++++++++++---------
src/util/virnetdev.h | 4 +++
src/util/virpci.c | 63 ++++++++++++++++++++++++++++++++++++--
src/util/virpci.h | 6 ++++
5 files changed, 130 insertions(+), 19 deletions(-)
diff --git a/src/hypervisor/virhostdev.c b/src/hypervisor/virhostdev.c
index 69102b8..1f5c347 100644
--- a/src/hypervisor/virhostdev.c
+++ b/src/hypervisor/virhostdev.c
@@ -333,7 +333,7 @@ virHostdevNetDevice(virDomainHostdevDefPtr hostdev,
* type='hostdev'>, and it is only those devices that should
* end up calling this function.
*/
- if (virPCIGetNetName(sysfs_path, 0, NULL, linkdev) < 0)
+ if (virPCIGetNetName(sysfs_path, 0, NULL, NULL, linkdev) < 0)
return -1;
if (!(*linkdev)) {
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index b42fa86..99e3b35 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -1112,6 +1112,29 @@ virNetDevGetPCIDevice(const char *devName)
}
+/* A wrapper to get content of file from ifname SYSFS_NET_DIR
+ */
+static int
+virNetDevGetSysfsFileValue(const char *ifname,
+ const char *fileName,
+ char **sysfsFileData)
+{
+ g_autofree char *sysfsFile = NULL;
+
+ *sysfsFileData = NULL;
+
+ if (virNetDevSysfsFile(&sysfsFile, ifname, fileName) < 0)
+ return -1;
+
+ /* a failure to read just means the driver doesn't support
+ * <fileName>, so set success now and ignore the return from
+ * virFileReadAllQuiet().
+ */
+
+ ignore_value(virFileReadAllQuiet(sysfsFile, 1024, sysfsFileData));
+ return 0;
+}
+
/**
* virNetDevGetPhysPortID:
*
@@ -1130,20 +1153,29 @@ int
virNetDevGetPhysPortID(const char *ifname,
char **physPortID)
{
- g_autofree char *physPortIDFile = NULL;
-
- *physPortID = NULL;
-
- if (virNetDevSysfsFile(&physPortIDFile, ifname, "phys_port_id") < 0)
- return -1;
+ return virNetDevGetSysfsFileValue(ifname, "phys_port_id", physPortID);
+}
- /* a failure to read just means the driver doesn't support
- * phys_port_id, so set success now and ignore the return from
- * virFileReadAllQuiet().
- */
- ignore_value(virFileReadAllQuiet(physPortIDFile, 1024, physPortID));
- return 0;
+/**
+ * virNetDevGetPhysPortName:
+ *
+ * @ifname: name of a netdev
+ *
+ * @physPortName: pointer to char* that will receive @ifname's
+ * phys_port_name from sysfs (null terminated
+ * string). Could be NULL if @ifname's net driver doesn't
+ * support phys_port_name (most netdev drivers
+ * don't). Caller is responsible for freeing the string
+ * when finished.
+ *
+ * Returns 0 on success or -1 on failure.
+ */
+int
+virNetDevGetPhysPortName(const char *ifname,
+ char **physPortName)
+{
+ return virNetDevGetSysfsFileValue(ifname, "phys_port_name", physPortName);
}
@@ -1200,7 +1232,7 @@ virNetDevGetVirtualFunctions(const char *pfname,
}
if (virPCIGetNetName(pci_sysfs_device_link, 0,
- pfPhysPortID, &((*vfname)[i])) < 0) {
+ pfPhysPortID, NULL, &((*vfname)[i])) < 0) {
goto cleanup;
}
@@ -1295,7 +1327,8 @@ virNetDevGetPhysicalFunction(const char *ifname, char **pfname)
return -1;
if (virPCIGetNetName(physfn_sysfs_path, 0,
- vfPhysPortID, pfname) < 0) {
+ vfPhysPortID,
+ VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) {
return -1;
}
@@ -1358,7 +1391,7 @@ virNetDevPFGetVF(const char *pfname, int vf, char **vfname)
* isn't bound to a netdev driver, it won't have a netdev name,
* and vfname will be NULL).
*/
- return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, vfname);
+ return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, NULL, vfname);
}
@@ -1403,6 +1436,17 @@ virNetDevGetPhysPortID(const char *ifname G_GNUC_UNUSED,
}
int
+virNetDevGetPhysPortName(const char *ifname G_GNUC_UNUSED,
+ char **physPortName)
+{
+ /* this actually should never be called, and is just here to
+ * satisfy the linker.
+ */
+ *physPortName = NULL;
+ return 0;
+}
+
+int
virNetDevGetVirtualFunctions(const char *pfname G_GNUC_UNUSED,
char ***vfname G_GNUC_UNUSED,
virPCIDeviceAddressPtr **virt_fns G_GNUC_UNUSED,
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index 55e3948..712421d 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -229,6 +229,10 @@ int virNetDevGetPhysPortID(const char *ifname,
char **physPortID)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
G_GNUC_WARN_UNUSED_RESULT;
+int virNetDevGetPhysPortName(const char *ifname,
+ char **physPortName)
+ ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
+ G_GNUC_WARN_UNUSED_RESULT;
int virNetDevGetVirtualFunctions(const char *pfname,
char ***vfname,
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 47c671d..18b3f66 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -2409,8 +2409,10 @@ virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
* virPCIGetNetName:
* @device_link_sysfs_path: sysfs path to the PCI device
* @idx: used to choose which netdev when there are several
- * (ignored if physPortID is set)
+ * (ignored if physPortID or physPortNameRegex is set)
* @physPortID: match this string in the netdev's phys_port_id
+ * (or NULL to ignore and use phys_port_name or idx instead)
+ * @physPortNameRegex: match this regex with netdev's phys_port_name
* (or NULL to ignore and use idx instead)
* @netname: used to return the name of the netdev
* (set to NULL (but returns success) if there is no netdev)
@@ -2421,11 +2423,13 @@ int
virPCIGetNetName(const char *device_link_sysfs_path,
size_t idx,
char *physPortID,
+ char *physPortNameRegex,
char **netname)
{
g_autofree char *pcidev_sysfs_net_path = NULL;
g_autofree char *firstEntryName = NULL;
g_autofree char *thisPhysPortID = NULL;
+ g_autofree char *thisPhysPortName = NULL;
int ret = -1;
DIR *dir = NULL;
struct dirent *entry = NULL;
@@ -2466,6 +2470,41 @@ virPCIGetNetName(const char *device_link_sysfs_path,
continue;
}
+ } else if (physPortNameRegex) {
+ /* Most switch devices use phys_port_name instead of
+ * phys_port_id.
+ * NOTE: VFs' representors net devices can be linked to PF's PCI
+ * device, which mean that there'll be multiple net devices
+ * instances and to get a proper net device need to match on
+ * specific regex.
+ * To get PF netdev, for ex., used following regex:
+ * "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
+ * or to get exact VF's netdev next regex is used:
+ * "pf0vf1$"
+ */
+ if (virNetDevGetPhysPortName(entry->d_name, &thisPhysPortName) < 0)
+ goto cleanup;
+
+ if (thisPhysPortName) {
+ /* if this one doesn't match, keep looking */
+ if (!virStringMatch(thisPhysPortName, physPortNameRegex)) {
+ VIR_FREE(thisPhysPortName);
+ /* Save the first entry we find to use as a failsafe
+ * in case we fail to match on regex.
+ */
+ if (!firstEntryName)
+ firstEntryName = g_strdup(entry->d_name);
+
+ continue;
+ }
+ } else {
+ /* Save the first entry we find to use as a failsafe in case
+ * phys_port_name is not supported.
+ */
+ if (!firstEntryName)
+ firstEntryName = g_strdup(entry->d_name);
+ continue;
+ }
} else {
if (i++ < idx)
continue;
@@ -2494,6 +2533,22 @@ virPCIGetNetName(const char *device_link_sysfs_path,
"phys_port_id '%s' under PCI device at %s"),
physPortID, device_link_sysfs_path);
}
+ } else if (physPortNameRegex) {
+ if (firstEntryName) {
+ /* We didn't match the provided phys_port_name regex, probably
+ * because kernel or NIC driver doesn't support it, so just
+ * return first netname we found.
+ */
+ *netname = firstEntryName;
+ firstEntryName = NULL;
+ ret = 0;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Could not find network device with "
+ "phys_port_name matching regex '%s' "
+ "under PCI device at %s"),
+ physPortNameRegex, device_link_sysfs_path);
+ }
} else {
ret = 0; /* no netdev at the given index is *not* an error */
}
@@ -2539,7 +2594,7 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
* correct.
*/
if (pfNetDevIdx == -1) {
- if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
+ if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, NULL, &vfname) < 0)
goto cleanup;
if (vfname) {
@@ -2550,7 +2605,8 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
}
if (virPCIGetNetName(pf_sysfs_device_path,
- pfNetDevIdx, vfPhysPortID, pfname) < 0) {
+ pfNetDevIdx, vfPhysPortID,
+ VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) {
goto cleanup;
}
@@ -2688,6 +2744,7 @@ int
virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED,
size_t idx G_GNUC_UNUSED,
char *physPortID G_GNUC_UNUSED,
+ char *physPortNameScheme G_GNUC_UNUSED,
char **netname G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
diff --git a/src/util/virpci.h b/src/util/virpci.h
index b3322ba..6ea0873 100644
--- a/src/util/virpci.h
+++ b/src/util/virpci.h
@@ -55,6 +55,11 @@ struct _virZPCIDeviceAddress {
#define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d"
+/* Represents format of PF's phys_port_name in switchdev mode:
+ * 'p%u' or 'p%us%u'. New line checked since value is readed from sysfs file.
+ */
+# define VIR_PF_PHYS_PORT_NAME_REGEX ((char *)"(p[0-9]+$)|(p[0-9]+s[0-9]+$)")
+
struct _virPCIDeviceAddress {
unsigned int domain;
unsigned int bus;
@@ -232,6 +237,7 @@ int virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
int virPCIGetNetName(const char *device_link_sysfs_path,
size_t idx,
char *physPortID,
+ char *physPortNameRegex,
char **netname);
int virPCIGetSysfsFile(char *virPCIDeviceName,
--
1.8.3.1
4 years, 3 months
[RFCv2 00/46] RFC: Generate parsexml/formatbuf functions based on directives
by Shi Lei
V1 here: [https://www.redhat.com/archives/libvir-list/2020-June/msg00357.html]
Differ from V1:
* Move the generator into scripts/xmlgen and rename it 'xmlgen'.
* Declare virXMLChildNode and virXMLChildNodeSet in libvirt_private.syms.
* Replace VIR_FREE with g_free and VIR_ALLOC[_N] with g_new0.
* Adjust virReportError to avoid unnecessary translation.
* Remove the macro VIR_USED and use G_GNUC_UNUSED to declare arguments.
* When parsing string member, assign value to it directly instead of
using middle variable.
* Don't set libclang_path. Just use python-clang's default setting.
* Use virEscapeString for escaping xml characters.
* Enable directive 'genformat' with a parameter to support separation mode.
* Add directive 'xmlswitch' and 'xmlgroup' to support discriminated unions.
* Allow directive 'array' and 'specified' to carry with a parameter,
which specifies its counterpart explicitly.
* Enable directive 'xmlattr' with path.
* Add directive 'formatflag' and 'formathook'.
For those new and changed directives, illustrate them by an example:
struct _virDomainGraphicsAuthDef { /* genparse, genformat:separate */
char *passwd; /* xmlattr, formatflag:VIR_DOMAIN_DEF_FORMAT_SECURE */
bool expires;
time_t validTo; /* xmlattr:passwdValidTo, specified:expires */
virDomainGraphicsAuthConnectedType connected; /* xmlattr */
};
struct _virDomainGraphicsListenDef { /* genparse:withhook, genformat */
virDomainGraphicsListenType type; /* xmlattr */
char *address; /* xmlattr, formathook */
char *network; /* xmlattr, formatflag:VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK */
char *socket; /* xmlattr, formathook */
int fromConfig; /* xmlattr, formatflag:%VIR_DOMAIN_DEF_FORMAT_STATUS */
bool autoGenerated; /* xmlattr, formatflag:%VIR_DOMAIN_DEF_FORMAT_STATUS */
};
struct _virDomainGraphicsSDLDef { /* genparse, genformat:separate */
char *display; /* xmlattr */
char *xauth; /* xmlattr */
bool fullscreen; /* xmlattr */
virTristateBool gl; /* xmlattr:gl/enable */
};
struct _virDomainGraphicsDef { /* genparse:concisehook, genformat */
virObjectPtr privateData;
virDomainGraphicsType type; /* xmlattr */
size_t nListens;
virDomainGraphicsListenDefPtr listens; /* xmlelem, array:nListens */
union {
virDomainGraphicsSDLDef sdl; /* xmlgroup */
virDomainGraphicsVNCDef vnc; /* xmlgroup */
virDomainGraphicsRDPDef rdp; /* xmlgroup */
virDomainGraphicsDesktopDef desktop; /* xmlgroup */
virDomainGraphicsSpiceDef spice; /* xmlgroup */
virDomainGraphicsEGLHeadlessDef egl_headless; /* xmlgroup */
} data; /* xmlswitch:type */
};
Explanation for these directives:
- genformat[:separate|onlyattrs|onlyelems]
Only work on a struct.
Generate formatbuf function for this struct only if 'genformat' is specified.
The function name is based on struct-name and suffixed with 'FormatBuf'.
When 'genformat:separate' is specified, generate two formatbuf functions
rather than a single full-mode formatbuf function.
One for formatting attributes and another for formatting elements.
These function names are based on struct-name and suffixed with 'FormatAttr'
and 'FormatElem' respectively.
The 'onlyattrs' and 'onlyelems' are just like 'separate', but only
generate one of those two functions according to its denotation.
- xmlattr[:[parentname/]thename]
Parse/Format the field as an XML attribute or
attribute wrapped by an XML element.
If only 'thename' is specified, use it as the XML attribute name;
or use the filed name.
The 'parentname' is the name of the attribute's parent element.
If 'parentname/thename' is specified, the corresponding form is
<parentname thename='..' />.
- xmlgroup
The field is a struct, but its corresponding form in XML is a group
rather than an element.
- xmlswitch:thename
Only for discriminated union. 'thename' is the name of its relative enum.
The name of each union member should match a shortname of the enum.
- array[:countername]
Parse/Format the field as an array.
Each array field must have an related counter field, which name is
specified by 'countername'.
If 'countername' is omitted, follow the pattern:
n + 'field_name'.
- specified[:thename]
This field has an related field to indicate its existence, and
'thename' specifies the name of this related field.
When 'thename' is omitted, follow the pattern:
'field_name' + '_specified'.
- formatflag:[!|%]flag
This field will be formatted and written out to XML only if the 'flag'
hits a target flagset.
The target flagset is passed into the formatbuf function through the
argument 'opaque'.
Adding a '!' before 'flag' means NOT hitting.
Adding a '%' before 'flag' means that flag hitting-check is the unique
condition for formatting this field. For example,
for 'passwd' in 'virDomainGraphicsAuthDef', the directive is:
formatflag:VIR_DOMAIN_DEF_FORMAT_SECURE
then the generated code:
if (def->passwd && (virXMLFlag(opaque) & VIR_DOMAIN_DEF_FORMAT_SECURE))
virBufferEscapeString(buf, " passwd='%s'", def->passwd);
If '%' is inserted like this:
formatflag:%VIR_DOMAIN_DEF_FORMAT_SECURE
then the generated code:
if ((virXMLFlag(opaque) & VIR_DOMAIN_DEF_FORMAT_SECURE))
virBufferEscapeString(buf, " passwd='%s'", def->passwd);
- formathook
Introduce hooks to handle the field if xmlgen can't deal with it now.
E.g., virDomainGraphicsListenDef have two fields with 'formathook',
which are 'address' and 'socket'.
The xmlgen will generate the declaration of some hooks for formatting
these fields and developers should implement them.
1) Check the declaration of hook by a commandline.
# ./scripts/xmlgen/go show virDomainGraphicsListenDef -kf
int
virDomainGraphicsListenDefFormatHook(const virDomainGraphicsListenDef *def,
const void *parent,
const void *opaque,
virBufferPtr addressBuf,
virBufferPtr socketBuf);
bool
virDomainGraphicsListenDefCheckHook(const virDomainGraphicsListenDef *def,
const void *parent,
void *opaque,
bool result);
2) Implement these two hooks in src/conf/domain_conf.c.
2.1) virXXXFormatHook
It is the hook for formatting field 'address' and 'socket'.
The 'addressBuf' and 'socketBuf' are used for output destinations respectively.
2.2) virXXXCheckHook
For structs, the xmlgen generates virXXXCheck function to come with
the virXXXFormatBuf. The virXXXCheck reports whether the corresponding
XML element is null.
The virXXXCheckHook intercepts the 'result' of virXXXCheck. It changes 'result'
or just forwards it according to those fields with 'formathook'.
Thanks!
Shi Lei (46):
scripts: Add a tool to generate xml parse/format functions
maint: Check python3-clang
maint: Call xmlgen automatically when c-head-files change
util: Add some xml-helper-functions to cooperate with xmlgen
util: Add helper functions for 'bool' and 'time_t' and cooperate with
xmlgen
util: Add parsexml/formatbuf helper functions for virSocketAddr
conf: Extract error-checking code from virNetworkDNSTxtDefParseXML
conf: Replace virNetworkDNSTxtDefParseXML(hardcoded) with
namesake(generated)
conf: Generate virNetworkDNSTxtDefFormatBuf
conf: Extract error-checking code from virNetworkDNSSrvDefParseXML
conf: Replace virNetworkDNSSrvDefParseXML(hardcoded) with
namesake(generated)
conf: Generate virNetworkDNSSrvDefFormatBuf
conf: Extract error-checking code from virNetworkDNSHostDefParseXML
conf: Replace virNetworkDNSHostDefParseXML(hardcoded) with
namesake(generated)
conf: Generate virNetworkDNSHostDefFormatBuf
conf: Extract virNetworkDNSForwarderParseXML from
virNetworkDNSParseXML
conf: Replace virNetworkDNSForwarderParseXML(hardcoded) with
namesake(generated)
conf: Generate virNetworkDNSForwarderFormatBuf
conf: Extract error-checking code from virNetworkDNSDefParseXML
conf: Replace virNetworkDNSDefParseXML(hardcoded) with
namesake(generated)
conf: Generate virNetworkDNSDefFormatBuf
conf: Extract embedded structs from virDomainGraphicsDef as standalone
structs
conf: Replace virDomainGraphicsDefParseXMLSDL(hardcoded) with
virDomainGraphicsSDLDefParseXML(generated)
conf: Generate format functions for virDomainGraphicsSDLDef
conf: Replace virDomainGraphicsAuthDefParseXML(hardcoded) with
namesake(generated)
conf: Generate format functions for virDomainGraphicsAuthDef
conf: Extract error-checking code from virDomainGraphicsDefParseXMLVNC
conf: Replace virDomainGraphicsDefParseXMLVNC(hardcoded) with
virDomainGraphicsVNCDefParseXML(generated)
conf: Generate virDomainGraphicsVNCDefFormatAttr
conf: Extract error-checking code from virDomainGraphicsDefParseXMLRDP
conf: Replace virDomainGraphicsDefParseXMLRDP(hardcoded) with
virDomainGraphicsRDPDefParseXML(generated)
conf: Generate virDomainGraphicsRDPDefFormatAttr
conf: Replace virDomainGraphicsDefParseXMLDesktop(hardcoded) with
virDomainGraphicsDesktopDefParseXML(generated)
conf: Generate virDomainGraphicsDesktopDefFormatAttr
conf: Add virSpiceChannelDef to help to parse the member 'channels' of
virDomainGraphicsSpiceDef
conf: Extract error-checking code from
virDomainGraphicsDefParseXMLSpice
conf: Replace virDomainGraphicsDefParseXMLSpice(hardcoded) with
virDomainGraphicsSpiceDefParseXML(generated)
conf: Generate virDomainGraphicsSpiceDefFormatElem and
virDomainGraphicsSpiceDefFormatAttr
conf: Replace virDomainGraphicsDefParseXMLEGLHeadless(hardcoded) with
virDomainGraphicsEGLHeadlessDefParseXML(generated)
conf: Generate virDomainGraphicsEGLHeadlessDefFormatElem
conf: Extract error-checking code from
virDomainGraphicsListenDefParseXML
conf: Replace virDomainGraphicsListenDefParseXML(hardcoded) with
namesake(generated)
conf: Generate virDomainGraphicsListenDefFormatBuf
conf: Extract error-checking code from virDomainGraphicsDefParseXML
conf: Replace virDomainGraphicsDefParseXML(hardcoded) with
namesake(generated)
conf: Replace virDomainGraphicsDefFormat(hardcoded) with
virDomainGraphicsDefFormatBuf(generated)
meson.build | 5 +
po/POTFILES.in | 3 +
scripts/meson.build | 8 +
scripts/xmlgen/directive.py | 1115 ++++++++++++++++++++
scripts/xmlgen/go | 7 +
scripts/xmlgen/main.py | 439 ++++++++
scripts/xmlgen/utils.py | 121 +++
src/conf/domain_conf.c | 1650 +++++++++---------------------
src/conf/domain_conf.h | 179 ++--
src/conf/meson.build | 41 +
src/conf/network_conf.c | 467 ++-------
src/conf/network_conf.h | 54 +-
src/conf/virconftypes.h | 18 +
src/libvirt_private.syms | 9 +
src/meson.build | 6 +
src/qemu/qemu_command.c | 4 +
src/qemu/qemu_driver.c | 2 +
src/qemu/qemu_hotplug.c | 2 +
src/qemu/qemu_migration_cookie.c | 1 +
src/qemu/qemu_process.c | 5 +
src/qemu/qemu_validate.c | 2 +
src/util/virsocketaddr.c | 42 +
src/util/virsocketaddr.h | 26 +-
src/util/virstring.c | 57 ++
src/util/virstring.h | 9 +
src/util/virxml.c | 105 ++
src/util/virxml.h | 6 +
src/vmx/vmx.c | 1 +
tests/meson.build | 1 +
tools/meson.build | 2 +
30 files changed, 2738 insertions(+), 1649 deletions(-)
create mode 100644 scripts/xmlgen/directive.py
create mode 100755 scripts/xmlgen/go
create mode 100755 scripts/xmlgen/main.py
create mode 100644 scripts/xmlgen/utils.py
--
2.25.1
4 years, 3 months
[PATCH] spec: keep existing nwfilters uuid on update
by Nikolay Shirokovskiy
Now on every nwfilter config package update we overwrite existing filters
entirely. It is desired to bring new version of filters on update but we'd
better keep their uuids I guess.
Actually patch primarily address noise in logs on update. If both libvirtd and
firewalld are running and libvirt is using firewalld backend then on firewalld
restart we reload all nwfilters. So if node is updated and we have update for
both firewalld and libvirt then in the process of update first new nwfilters of
libvirt package are copied to /etc/libvirt/nwfilters then firewalld is
restarted and then libvirtd is restarted. In this process firewalld restart
cause log messages like [1]. The issue is libvirt brings nwfilters without
<uuid> in definition and on handling firewalld restart libvirt generates
missing uuid and then fail to update filter definition because it is already
present in filters list with different uuid.
[1] virNWFilterObjListAssignDef:337 : operation failed: filter 'no-ip-spoofing'
already exists with uuid c302edf9-8a48-40d8-a652-f70b2c563ad1
Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy(a)virtuozzo.com>
---
libvirt.spec.in | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/libvirt.spec.in b/libvirt.spec.in
index 2a4324b..6a31440 100644
--- a/libvirt.spec.in
+++ b/libvirt.spec.in
@@ -1438,7 +1438,18 @@ fi
rm -rf %{_localstatedir}/lib/rpm-state/libvirt || :
%post daemon-config-nwfilter
-cp %{_datadir}/libvirt/nwfilter/*.xml %{_sysconfdir}/libvirt/nwfilter/
+# keep existing filters uuid on update
+for dfile in %{_datadir}/libvirt/nwfilter/*.xml; do
+ sfile=%{_sysconfdir}/libvirt/nwfilter/`basename $dfile`
+ if [ -f "$sfile" ]; then
+ uuidstr=`sed -n '/<uuid>.*<\/uuid>/p' "$sfile"`
+ if [ ! -z "$uuidstr" ]; then
+ sed -e "s,<filter .*>,&\n$uuidstr," "$dfile" > "$sfile"
+ continue
+ fi
+ fi
+ cp "$dfile" "$sfile"
+done
# libvirt saves these files with mode 600
chmod 600 %{_sysconfdir}/libvirt/nwfilter/*.xml
# Make sure libvirt picks up the new nwfilter defininitons
--
1.8.3.1
4 years, 3 months
[libvirt][RFC PATCH] add a new 'default' option for attribute mode in numatune
by Luyao Zhong
Hi Libvirt experts,
I would like enhence the numatune snippet configuration. Given a example snippet:
<domain>
...
<numatune>
<memory mode="strict" nodeset="1-4,^3"/>
<memnode cellid="0" mode="strict" nodeset="1"/>
<memnode cellid="2" mode="preferred" nodeset="2"/>
</numatune>
...
</domain>
Currently, attribute mode is either 'interleave', 'strict', or 'preferred',
I propose to add a new 'default' option. I give the reason as following.
Presume we are using cgroups v1, Libvirt sets cpuset.mems for all vcpu threads
according to 'nodeset' in memory element. And translate the memnode element to
qemu config options (--object memory-backend-ram) for per numa cell, which
invoking mbind() system call at the end.[1]
But what if we want using default memory policy and request each guest numa cell
pinned to different host memory nodes? We can't use mbind via qemu config options,
because (I quoto here) "For MPOL_DEFAULT, the nodemask and maxnode arguments must
be specify the empty set of nodes." [2]
So my solution is introducing a new 'default' option for attribute mode. e.g.
<domain>
...
<numatune>
<memory mode="default" nodeset="1-2"/>
<memnode cellid="0" mode="default" nodeset="1"/>
<memnode cellid="1" mode="default" nodeset="2"/>
</numatune>
...
</domain>
If the mode is 'default', libvirt should avoid generating qemu command line
'--object memory-backend-ram', and invokes cgroups to set cpuset.mems for per guest numa
combining with numa topology config. Presume the numa topology is :
<cpu>
...
<numa>
<cell id='0' cpus='0-3' memory='512000' unit='KiB' />
<cell id='1' cpus='4-7' memory='512000' unit='KiB' />
</numa>
...
</cpu>
Then libvirt should set cpuset.mems to '1' for vcpus 0-3, and '2' for vcpus 4-7.
Is this reasonable and feasible? Welcome any comments.
Regards,
Luyao
[1]https://github.com/qemu/qemu/blob/f2a1cf9180f63e88bb38ff21c169da97c3f2b...
[2]https://man7.org/linux/man-pages/man2/mbind.2.html
--
2.25.1
4 years, 4 months
[PATCH v2 0/5] Hypervisor CPU Baseline Cleanups and Fixes
by Collin Walling
The following patches provide some TLC to the hypervisor CPU baseline
handler within the qemu_driver code.
#1 checks for the cpu-model-expansion capability before
executing the baseline handler since it is used for feature expansion.
#2 fixes a styling where a < 0 condition was missing from one of the
if (function()) lines for consistency's sake.
#3 will check if the cpu definition(s) are valid and contain a model
name.
#4 checks the cpu definition(s) model names against the model names
known by the hypervisor. This patch must come before #5.
#5 will allow the baseline command to be ran with a single cpu
definition, whereas before the command would simply fail with an
unhelpful error message. A CPU model expansion will be performed in
this case, which will produce the same result as if the model were
actually baselined.
Note: without patch #4, #5 can result in a segfault in the case where
a single CPU model is provided and the model is not recognized by the
hypervisor. This is because cpu model expansion will return 0 and the
result will be NULL.
Since the QMP response returns "GenericError" in the case of a missing
CPU model, or if the command is not supported (and perhaps for other
reasons I am unsure of -- the response does not explicitly detail that
the CPU model provided was erroneous), we cannot rely on this
response always meaning there was a missing CPU model.
So, to be safe-and-sure, the CPU model is checked against the list of
CPU models known to the hypervisor prior to baselining / expanding
(which were retrieved at some point previously during libvirt init).
Collin Walling (5):
qemu: check for model-expansion cap before baselining
qemu: fix one instance of rc check styling in baseline
qemu: report error if missing model name when baselining
qemu: check if cpu model is supported before baselining
qemu: fix error message when baselining with a single cpu
src/qemu/qemu_driver.c | 45 ++++++++++++++++++++++++++++++++++--------
1 file changed, 37 insertions(+), 8 deletions(-)
--
2.26.2
4 years, 4 months
[PATCH] news: Mention Cooperlake cpu model in v6.4.0
by Han Han
Signed-off-by: Han Han <hhan(a)redhat.com>
---
NEWS.rst | 2 ++
1 file changed, 2 insertions(+)
diff --git a/NEWS.rst b/NEWS.rst
index 2fef3f706c..5dac805390 100644
--- a/NEWS.rst
+++ b/NEWS.rst
@@ -463,6 +463,8 @@ v6.4.0 (2020-06-02)
already does in these cases. Users are encouraged to provide complete NUMA
topologies to avoid unexpected changes in the domain XML.
+ * Cooperlake x86 CPU model is added
+
* **Bug fixes**
* qemu: fixed regression in network device hotplug with new qemu versions
--
2.28.0
4 years, 4 months
Races / crashes in shutdown of libvirtd daemon
by Daniel P. Berrangé
We got a new BZ filed about a libvirtd crash in shutdown
https://bugzilla.redhat.com/show_bug.cgi?id=1828207
We can see from the stack trace that the "interface" driver is in
the middle of servicing an RPC call for virConnectListAllInterfaces()
Meanwhile the libvirtd daemon is doing virObjectUnref(dmn) on the
virNetDaemonPtr object.
The fact that it is doing this unref, means that it must have already
call virStateCleanup(), given the code sequence:
/* Run event loop. */
virNetDaemonRun(dmn);
ret = 0;
virHookCall(VIR_HOOK_DRIVER_DAEMON, "-", VIR_HOOK_DAEMON_OP_SHUTDOWN,
0, "shutdown", NULL, NULL);
cleanup:
/* Keep cleanup order in inverse order of startup */
virNetDaemonClose(dmn);
virNetlinkEventServiceStopAll();
if (driversInitialized) {
/* NB: Possible issue with timing window between driversInitialized
* setting if virNetlinkEventServerStart fails */
driversInitialized = false;
virStateCleanup();
}
virObjectUnref(adminProgram);
virObjectUnref(srvAdm);
virObjectUnref(qemuProgram);
virObjectUnref(lxcProgram);
virObjectUnref(remoteProgram);
virObjectUnref(srv);
virObjectUnref(dmn);
Unless I'm missing something non-obvious, this cleanup code path is
inherantly broken & racy. When virNetDaemonRun() returns the RPC
worker threads are all still active. They are all liable to still
be executing RPC calls, which means any of the drivers may be in
use. So calling virStateCleanup() is an inherantly dangerous
thing to do. There is the further complication that once we have
exitted the main loop we may prevent the RPC calls from ever
completing, as they may be waiting on an event to be dispatched.
I know we're had various patch proposals in the past to improve the
robustness of shutdown cleanup but I can't remember the outcome of the
reviews. Hopefully people involved in those threads can jump in here...
IMHO the key problem here is the virNetDeamonRun() method which just
looks at the "quit" flag and immediately returns if it is set.
This needs to be changed so that when it sees quit == true, it takes
the following actions
1. Call virNetDaemonClose() to drop all RPC clients and thus prevent
new RPC calls arriving
2. Flush any RPC calls which are queued but not yet assigned to a
worker thread
3. Tell worker threads to exit after finishing their current job
4. Wait for all worker threads to exit
5. Now virNetDaemonRun may return
At this point we can call virStateCleanup and the various other
things, as we know no drivers are still active in RPC calls.
Having said that, there could be background threads in the the
drivers which are doing work that uses the event loop thread.
So we probably need a virStateClose() method that we call from
virNetDaemonRun, *after* all worker threads are gone, which would
cleanup any background threads while the event loop is still
running.
The issue is that step 4 above ("Wait for all worker threads to exit")
may take too long, or indeed never complete. To deal with this, it
will need a timeout. In the remote_daemon.c cleanup code path, if
there are still worker threads present, then we need to skip all
cleanup and simply call _exit(0) to terminate the process with no
attempt at cleanup, since it would be unsafe to try anything else.
Regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
4 years, 4 months
[libvirt PATCH v2 00/16] Add support for persistent mediated devices
by Jonathon Jongsma
This patch series follows the previously-merged series which added support for
transient mediated devices. This series expands mdev support to include
persistent device definitions. Again, it relies on mdevctl as the backend.
It follows the common libvirt pattern of APIs by adding the following new APIs
for node devices:
- virNodeDeviceDefineXML() - defines a persistent device
- virNodeDeviceUndefine() - undefines a persistent device
- virNodeDeviceCreate() - starts a previously-defined device
It also adds virsh commands mapping to these new APIs: nodedev-define,
nodedev-undefine, and nodedev-start.
The method of staying up-to-date with devices defined by mdevctl is currently=
a
little bit crude due to the fact that mdevctl does not emit any events when n=
ew
devices are added or removed. As a workaround, we create a file monitor for t=
he
mdevctl config directory and re-query mdevctl when we detect changes within
that directory. In the future, mdevctl may introduce a more elegant solution.
Changes in v2:
- rebase to latest git master
Jonathon Jongsma (16):
tests: remove extra trailing semicolon
nodedev: introduce concept of 'active' node devices
nodedev: Add ability to filter by active state
virsh: Add --active, --inactive, --all to nodedev-list
nodedev: add ability to list and parse defined mdevs
nodedev: add STOPPED/STARTED lifecycle events
nodedev: add mdevctl devices to node device list
nodedev: handle mdevs that disappear from mdevctl
nodedev: add an mdevctl thread
api: add virNodeDeviceDefineXML()
virsh: add nodedev-define command
api: add virNodeDeviceUndefine()
virsh: Factor out function to find node device
virsh: add nodedev-undefine command
api: add virNodeDeviceCreate()
virsh: add "nodedev-start" command
examples/c/misc/event-test.c | 4 +
include/libvirt/libvirt-nodedev.h | 19 +-
src/conf/node_device_conf.h | 9 +
src/conf/virnodedeviceobj.c | 24 +
src/conf/virnodedeviceobj.h | 7 +
src/driver-nodedev.h | 14 +
src/libvirt-nodedev.c | 115 ++++
src/libvirt_private.syms | 2 +
src/libvirt_public.syms | 6 +
src/node_device/node_device_driver.c | 525 +++++++++++++++++-
src/node_device/node_device_driver.h | 38 ++
src/node_device/node_device_udev.c | 275 ++++++++-
src/remote/remote_driver.c | 3 +
src/remote/remote_protocol.x | 40 +-
src/remote_protocol-structs | 16 +
src/rpc/gendispatch.pl | 1 +
...19_36ea_4111_8f0a_8c9a70e21366-define.argv | 1 +
...19_36ea_4111_8f0a_8c9a70e21366-define.json | 1 +
...39_495e_4243_ad9f_beb3f14c23d9-define.argv | 1 +
...39_495e_4243_ad9f_beb3f14c23d9-define.json | 1 +
...16_1ca8_49ac_b176_871d16c13076-define.argv | 1 +
...16_1ca8_49ac_b176_871d16c13076-define.json | 1 +
tests/nodedevmdevctldata/mdevctl-create.argv | 1 +
.../mdevctl-list-defined.argv | 1 +
.../mdevctl-list-multiple-parents.json | 59 ++
.../mdevctl-list-multiple-parents.out.xml | 39 ++
.../mdevctl-list-multiple.json | 59 ++
.../mdevctl-list-multiple.out.xml | 39 ++
.../mdevctl-list-single-noattr.json | 11 +
.../mdevctl-list-single-noattr.out.xml | 8 +
.../mdevctl-list-single.json | 31 ++
.../mdevctl-list-single.out.xml | 14 +
.../nodedevmdevctldata/mdevctl-undefine.argv | 1 +
tests/nodedevmdevctltest.c | 227 +++++++-
tools/virsh-nodedev.c | 281 ++++++++--
35 files changed, 1787 insertions(+), 88 deletions(-)
create mode 100644 tests/nodedevmdevctldata/mdev_d069d019_36ea_4111_8f0a_8c9=
a70e21366-define.argv
create mode 100644 tests/nodedevmdevctldata/mdev_d069d019_36ea_4111_8f0a_8c9=
a70e21366-define.json
create mode 100644 tests/nodedevmdevctldata/mdev_d2441d39_495e_4243_ad9f_beb=
3f14c23d9-define.argv
create mode 100644 tests/nodedevmdevctldata/mdev_d2441d39_495e_4243_ad9f_beb=
3f14c23d9-define.json
create mode 100644 tests/nodedevmdevctldata/mdev_fedc4916_1ca8_49ac_b176_871=
d16c13076-define.argv
create mode 100644 tests/nodedevmdevctldata/mdev_fedc4916_1ca8_49ac_b176_871=
d16c13076-define.json
create mode 100644 tests/nodedevmdevctldata/mdevctl-create.argv
create mode 100644 tests/nodedevmdevctldata/mdevctl-list-defined.argv
create mode 100644 tests/nodedevmdevctldata/mdevctl-list-multiple-parents.js=
on
create mode 100644 tests/nodedevmdevctldata/mdevctl-list-multiple-parents.ou=
t.xml
create mode 100644 tests/nodedevmdevctldata/mdevctl-list-multiple.json
create mode 100644 tests/nodedevmdevctldata/mdevctl-list-multiple.out.xml
create mode 100644 tests/nodedevmdevctldata/mdevctl-list-single-noattr.json
create mode 100644 tests/nodedevmdevctldata/mdevctl-list-single-noattr.out.x=
ml
create mode 100644 tests/nodedevmdevctldata/mdevctl-list-single.json
create mode 100644 tests/nodedevmdevctldata/mdevctl-list-single.out.xml
create mode 100644 tests/nodedevmdevctldata/mdevctl-undefine.argv
--=20
2.26.2
4 years, 4 months
Libvirt Open Source Contribution
by Barrett J Schonefeld
Hey libvirt team,
We (Ryan Gahagan, Dustan Helm, and Barrett Schonefeld) are computer science
students at the University of Texas at Austin. We are taking a course in
virtualization, and we’d like to contribute to the libvirt repository as
part of this course. Here are the issues we are most interested in:
https://gitlab.com/libvirt/libvirt/-/issues/11
https://gitlab.com/libvirt/libvirt/-/issues/16
Additionally, we would like to take a look at issue 4 (
https://gitlab.com/libvirt/libvirt/-/issues/4), the UDP slowdown for QEMU.
We expect issue 4 to be more time-intensive, and we would like to
communicate with you to ensure we’re solving the problem effectively.
Our course only runs until the end of the fall semester, so our time to
contribute to this project is somewhat limited. If you think any of the
issues we picked would be too difficult to accomplish during that time
frame, we would appreciate alternative suggestions. We really hope to
contribute to this project and help make improvements where we can.
Best regards,
Dustan Helm: dustan.helm(a)yahoo.com
Barrett Schonefeld: bschoney(a)utexas.edu
Ryan Gahagan: ryangahagan18(a)gmail.com
4 years, 4 months