Re: [libvirt] [Qemu-devel] Qemu migration with vhost-user-blk on top of local storage
by Stefan Hajnoczi
On Wed, Jan 09, 2019 at 06:23:42PM +0800, wuzhouhui wrote:
> Hi everyone,
>
> I'm working qemu with vhost target (e.g. spdk), and I attempt to migrate VM with
> 2 local storages. One local storage is a regular file, e.g. /tmp/c74.qcow2, and
> the other is a malloc bdev that spdk created. This malloc bdev will exported to
> VM via vhost-user-blk. When I execute following command:
>
> virsh migrate --live --persistent --unsafe --undefinesource --copy-storage-all \
> --p2p --auto-converge --verbose --desturi qemu+tcp://<uri>/system vm0
>
> The libvirt reports:
>
> qemu-2.12.1: error: internal error: unable to execute QEMU command \
> 'nbd-server-add': Cannot find device=drive-virtio-disk1 nor \
> node_name=drive-virtio-disk1
Please post your libvirt domain XML.
> Does it means that qemu with spdk on top of local storage don't support migration?
>
> QEMU: 2.12.1
> SPDK: 18.10
vhost-user-blk bypasses the QEMU block layer, so NBD storage migration
at the QEMU level will not work for the vhost-user-blk disk.
Stefan
11 months, 3 weeks
[libvirt] [PATCH v3] openvswitch: Add new port VLAN mode "dot1q-tunnel"
by luzhipeng@uniudc.com
From: ZhiPeng Lu <luzhipeng(a)uniudc.com>
Signed-off-by: ZhiPeng Lu <luzhipeng(a)uniudc.com>
---
v1->v2:
1. Fix "make syntax-check" failure
v2->v3:
1. remove other_config when updating vlan
docs/formatnetwork.html.in | 17 +++++++++--------
docs/schemas/networkcommon.rng | 1 +
src/conf/netdev_vlan_conf.c | 2 +-
src/util/virnetdevopenvswitch.c | 7 +++++++
src/util/virnetdevvlan.h | 1 +
5 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/docs/formatnetwork.html.in b/docs/formatnetwork.html.in
index 363a72b..3c1ae62 100644
--- a/docs/formatnetwork.html.in
+++ b/docs/formatnetwork.html.in
@@ -688,16 +688,17 @@
</p>
<p>
For network connections using Open vSwitch it is also possible
- to configure 'native-tagged' and 'native-untagged' VLAN modes
+ to configure 'native-tagged' and 'native-untagged' and 'dot1q-tunnel'
+ VLAN modes.
<span class="since">Since 1.1.0.</span> This is done with the
- optional <code>nativeMode</code> attribute on
- the <code><tag></code> subelement: <code>nativeMode</code>
- may be set to 'tagged' or 'untagged'. The <code>id</code>
- attribute of the <code><tag></code> subelement
- containing <code>nativeMode</code> sets which VLAN is considered
- to be the "native" VLAN for this interface, and
+ optional <code>nativeMode</code> attribute on the
+ <code><tag></code> subelement: <code>nativeMode</code>
+ may be set to 'tagged' or 'untagged' or 'dot1q-tunnel'.
+ The <code>id</code> attribute of the <code><tag></code>
+ subelement containing <code>nativeMode</code> sets which VLAN is
+ considered to be the "native" VLAN for this interface, and
the <code>nativeMode</code> attribute determines whether or not
- traffic for that VLAN will be tagged.
+ traffic for that VLAN will be tagged or QinQ.
</p>
<p>
<code><vlan></code> elements can also be specified in
diff --git a/docs/schemas/networkcommon.rng b/docs/schemas/networkcommon.rng
index 2699555..11c48ff 100644
--- a/docs/schemas/networkcommon.rng
+++ b/docs/schemas/networkcommon.rng
@@ -223,6 +223,7 @@
<choice>
<value>tagged</value>
<value>untagged</value>
+ <value>dot1q-tunnel</value>
</choice>
</attribute>
</optional>
diff --git a/src/conf/netdev_vlan_conf.c b/src/conf/netdev_vlan_conf.c
index dff49c6..79710d9 100644
--- a/src/conf/netdev_vlan_conf.c
+++ b/src/conf/netdev_vlan_conf.c
@@ -29,7 +29,7 @@
#define VIR_FROM_THIS VIR_FROM_NONE
VIR_ENUM_IMPL(virNativeVlanMode, VIR_NATIVE_VLAN_MODE_LAST,
- "default", "tagged", "untagged")
+ "default", "tagged", "untagged", "dot1q-tunnel")
int
virNetDevVlanParse(xmlNodePtr node, xmlXPathContextPtr ctxt, virNetDevVlanPtr def)
diff --git a/src/util/virnetdevopenvswitch.c b/src/util/virnetdevopenvswitch.c
index 8fe06fd..9fec30b 100644
--- a/src/util/virnetdevopenvswitch.c
+++ b/src/util/virnetdevopenvswitch.c
@@ -91,6 +91,11 @@ virNetDevOpenvswitchConstructVlans(virCommandPtr cmd, virNetDevVlanPtr virtVlan)
virCommandAddArg(cmd, "vlan_mode=native-untagged");
virCommandAddArgFormat(cmd, "tag=%d", virtVlan->nativeTag);
break;
+ case VIR_NATIVE_VLAN_MODE_DOT1Q_TUNNEL:
+ virCommandAddArg(cmd, "vlan_mode=dot1q-tunnel");
+ virCommandAddArg(cmd, "other_config:qinq-ethtype=802.1q");
+ virCommandAddArgFormat(cmd, "tag=%d", virtVlan->nativeTag);
+ break;
case VIR_NATIVE_VLAN_MODE_DEFAULT:
default:
break;
@@ -504,6 +509,8 @@ int virNetDevOpenvswitchUpdateVlan(const char *ifname,
"--", "--if-exists", "clear", "Port", ifname, "tag",
"--", "--if-exists", "clear", "Port", ifname, "trunk",
"--", "--if-exists", "clear", "Port", ifname, "vlan_mode",
+ "--", "--if-exists", "remove", "Port", ifname, "other_config",
+ "qinq-ethtype", NULL,
"--", "--if-exists", "set", "Port", ifname, NULL);
if (virNetDevOpenvswitchConstructVlans(cmd, virtVlan) < 0)
diff --git a/src/util/virnetdevvlan.h b/src/util/virnetdevvlan.h
index be85f59..0667f9d 100644
--- a/src/util/virnetdevvlan.h
+++ b/src/util/virnetdevvlan.h
@@ -29,6 +29,7 @@ typedef enum {
VIR_NATIVE_VLAN_MODE_DEFAULT = 0,
VIR_NATIVE_VLAN_MODE_TAGGED,
VIR_NATIVE_VLAN_MODE_UNTAGGED,
+ VIR_NATIVE_VLAN_MODE_DOT1Q_TUNNEL,
VIR_NATIVE_VLAN_MODE_LAST
} virNativeVlanMode;
--
1.8.3.1
11 months, 3 weeks
[libvirt] [PATCH] Fix compile error for stable 1.2.9
by Yang hongyang
Seems a backport miss. An extra member is passed to struct
virLXCBasicMountInfo.
Signed-off-by: Yang hongyang <hongyang.yang(a)easystack.cn>
---
src/lxc/lxc_container.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
index 28dabec..1c65fa9 100644
--- a/src/lxc/lxc_container.c
+++ b/src/lxc/lxc_container.c
@@ -760,7 +760,7 @@ typedef struct {
static const virLXCBasicMountInfo lxcBasicMounts[] = {
{ "proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, false, false },
- { "/proc/sys", "/proc/sys", NULL, MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false, false },
+ { "/proc/sys", "/proc/sys", NULL, MS_BIND|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false },
{ "sysfs", "/sys", "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false },
{ "securityfs", "/sys/kernel/security", "securityfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true },
#if WITH_SELINUX
--
1.7.1
11 months, 3 weeks
[libvirt] Supporting vhost-net and macvtap in libvirt for QEMU
by Anthony Liguori
Disclaimer: I am neither an SR-IOV nor a vhost-net expert, but I've CC'd
people that are who can throw tomatoes at me for getting bits wrong :-)
I wanted to start a discussion about supporting vhost-net in libvirt.
vhost-net has not yet been merged into qemu but I expect it will be soon
so it's a good time to start this discussion.
There are two modes worth supporting for vhost-net in libvirt. The
first mode is where vhost-net backs to a tun/tap device. This is
behaves in very much the same way that -net tap behaves in qemu today.
Basically, the difference is that the virtio backend is in the kernel
instead of in qemu so there should be some performance improvement.
Current, libvirt invokes qemu with -net tap,fd=X where X is an already
open fd to a tun/tap device. I suspect that after we merge vhost-net,
libvirt could support vhost-net in this mode by just doing -net
vhost,fd=X. I think the only real question for libvirt is whether to
provide a user visible switch to use vhost or to just always use vhost
when it's available and it makes sense. Personally, I think the later
makes sense.
The more interesting invocation of vhost-net though is one where the
vhost-net device backs directly to a physical network card. In this
mode, vhost should get considerably better performance than the current
implementation. I don't know the syntax yet, but I think it's
reasonable to assume that it will look something like -net
tap,dev=eth0. The effect will be that eth0 is dedicated to the guest.
On most modern systems, there is a small number of network devices so
this model is not all that useful except when dealing with SR-IOV
adapters. In that case, each physical device can be exposed as many
virtual devices (VFs). There are a few restrictions here though. The
biggest is that currently, you can only change the number of VFs by
reloading a kernel module so it's really a parameter that must be set at
startup time.
I think there are a few ways libvirt could support vhost-net in this
second mode. The simplest would be to introduce a new tag similar to
<source network='br0'>. In fact, if you probed the device type for the
network parameter, you could probably do something like <source
network='eth0'> and have it Just Work.
Another model would be to have libvirt see an SR-IOV adapter as a
network pool whereas it handled all of the VF management. Considering
how inflexible SR-IOV is today, I'm not sure whether this is the best model.
Has anyone put any more thought into this problem or how this should be
modeled in libvirt? Michael, could you share your current thinking for
-net syntax?
--
Regards,
Anthony Liguori
11 months, 3 weeks
[PATCH] Qemu: migration: Not bind RAM info with active migration status
by Keqian Zhu
For that Qemu supports returning incoming migration info since its commit
65ace0604551 (migration: add postcopy total blocktime into query-migrate),
which may contains active status, but without RAM info. Drop this binding
relationship check in libvirt.
Signed-off-by: Keqian Zhu <zhukeqian1(a)huawei.com>
---
src/qemu/qemu_monitor_json.c | 88 +++++++++++++++++-------------------
1 file changed, 42 insertions(+), 46 deletions(-)
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index d808c4b55b..ba8e340742 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -3547,56 +3547,52 @@ qemuMonitorJSONGetMigrationStatsReply(virJSONValuePtr reply,
case QEMU_MONITOR_MIGRATION_STATUS_PRE_SWITCHOVER:
case QEMU_MONITOR_MIGRATION_STATUS_DEVICE:
ram = virJSONValueObjectGetObject(ret, "ram");
- if (!ram) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("migration was active, but no RAM info was set"));
- return -1;
- }
+ if (ram) {
+ if (virJSONValueObjectGetNumberUlong(ram, "transferred",
+ &stats->ram_transferred) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("migration was active, but RAM 'transferred' "
+ "data was missing"));
+ return -1;
+ }
+ if (virJSONValueObjectGetNumberUlong(ram, "remaining",
+ &stats->ram_remaining) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("migration was active, but RAM 'remaining' "
+ "data was missing"));
+ return -1;
+ }
+ if (virJSONValueObjectGetNumberUlong(ram, "total",
+ &stats->ram_total) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("migration was active, but RAM 'total' "
+ "data was missing"));
+ return -1;
+ }
- if (virJSONValueObjectGetNumberUlong(ram, "transferred",
- &stats->ram_transferred) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("migration was active, but RAM 'transferred' "
- "data was missing"));
- return -1;
- }
- if (virJSONValueObjectGetNumberUlong(ram, "remaining",
- &stats->ram_remaining) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("migration was active, but RAM 'remaining' "
- "data was missing"));
- return -1;
- }
- if (virJSONValueObjectGetNumberUlong(ram, "total",
- &stats->ram_total) < 0) {
- virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
- _("migration was active, but RAM 'total' "
- "data was missing"));
- return -1;
- }
+ if (virJSONValueObjectGetNumberDouble(ram, "mbps", &mbps) == 0 &&
+ mbps > 0) {
+ /* mpbs from QEMU reports Mbits/s (M as in 10^6 not Mi as 2^20) */
+ stats->ram_bps = mbps * (1000 * 1000 / 8);
+ }
- if (virJSONValueObjectGetNumberDouble(ram, "mbps", &mbps) == 0 &&
- mbps > 0) {
- /* mpbs from QEMU reports Mbits/s (M as in 10^6 not Mi as 2^20) */
- stats->ram_bps = mbps * (1000 * 1000 / 8);
+ if (virJSONValueObjectGetNumberUlong(ram, "duplicate",
+ &stats->ram_duplicate) == 0)
+ stats->ram_duplicate_set = true;
+ ignore_value(virJSONValueObjectGetNumberUlong(ram, "normal",
+ &stats->ram_normal));
+ ignore_value(virJSONValueObjectGetNumberUlong(ram, "normal-bytes",
+ &stats->ram_normal_bytes));
+ ignore_value(virJSONValueObjectGetNumberUlong(ram, "dirty-pages-rate",
+ &stats->ram_dirty_rate));
+ ignore_value(virJSONValueObjectGetNumberUlong(ram, "page-size",
+ &stats->ram_page_size));
+ ignore_value(virJSONValueObjectGetNumberUlong(ram, "dirty-sync-count",
+ &stats->ram_iteration));
+ ignore_value(virJSONValueObjectGetNumberUlong(ram, "postcopy-requests",
+ &stats->ram_postcopy_reqs));
}
- if (virJSONValueObjectGetNumberUlong(ram, "duplicate",
- &stats->ram_duplicate) == 0)
- stats->ram_duplicate_set = true;
- ignore_value(virJSONValueObjectGetNumberUlong(ram, "normal",
- &stats->ram_normal));
- ignore_value(virJSONValueObjectGetNumberUlong(ram, "normal-bytes",
- &stats->ram_normal_bytes));
- ignore_value(virJSONValueObjectGetNumberUlong(ram, "dirty-pages-rate",
- &stats->ram_dirty_rate));
- ignore_value(virJSONValueObjectGetNumberUlong(ram, "page-size",
- &stats->ram_page_size));
- ignore_value(virJSONValueObjectGetNumberUlong(ram, "dirty-sync-count",
- &stats->ram_iteration));
- ignore_value(virJSONValueObjectGetNumberUlong(ram, "postcopy-requests",
- &stats->ram_postcopy_reqs));
-
disk = virJSONValueObjectGetObject(ret, "disk");
if (disk) {
rc = virJSONValueObjectGetNumberUlong(disk, "transferred",
--
2.19.1
3 years, 9 months
Release of libvirt-6.5.0
by Daniel Veillard
Half a day late, but I pushed the 6.5.0 release out, it is as usual
available as a signed tarball and source rpms from the server:
https://libvirt.org/sources/
I also tagged and pushed the 6.5.0 python bindings that one can find at
https://libvirt.org/sources/python/
This release includes a number of new features and some improvement,
as well as a crash which had made its way in 6.4.0.
It will also be my last release of libvirt after close to 15 years,
so expect new releases to be signed by Jiri Denemark from now on.
* New Features:
- Allow firmware blobs configuration
QEMU offers a way to tweak how firmware configures itself
and/or provide new configuration blobs. New ``<sysinfo/>``
type is introduced that will hold these new blobs.
It's possible to either specify new value as a string or
provide a filename which contents then serve as the value.
- nodedev: Add ability to create mediated devices
Mediated devices can now be created with ``virNodeDeviceCreateXML()``. This
functionality requires the ``mdevctl`` utility to be installed. The XML
schema for node devices was expanded to support attributes for mediated
devices.
- QEMU: add TPM Proxy device support
libvirt can now create guests using a new device type called
"TPM Proxy". The TPM Proxy connects to a TPM Resource Manager
present in the host, enabling the guest to run in secure virtual
machine mode with the help of an Ultravisor. Adding a TPM Proxy to
a pSeries guest brings no security benefits unless the guest is
running on a PPC64 host that has Ultravisor and TPM Resource Manager
support. Only one TPM Proxy is allowed per guest. A guest using
a TPM Proxy device can instantiate another TPM device at the same
time. This device is supported only for pSeries guests via the new
'spapr-tpm-proxy' model of the TPM 'passthrough' backend.
- virhook: Support hooks placed in several files
Running all scripts from directory /etc/libvirt/hooks/<driver>.d in
alphabetical order. Hook script in old place will be executed
as first for backward compatibility.
- qemu: Add support for migratable host-passthrough CPU
QEMU 2.12 made it possible for guests to use a migration-friendly
version of the host-passthrough CPU. This feature is now exposed by
libvirt.
* Improvements:
- network: Support NAT with IPv6
It's now possible to use ``<nat ipv6="yes"/>`` in a libvirt network.
- qemu: Auto-fill NUMA information for incomplete topologies
If the NUMA topology is not fully described in the guest XML, libvirt
will complete it by putting all unspecified CPUs in the first NUMA node.
This is only done in the QEMU binary itself supports disjointed CPU
ranges for NUMA nodes.
- qemu: Assign hostdev-backed interfaces to PCIe slots
All SR-IOV capable devices are PCIe, so when their VFs are assigned to
guests they should end up in PCIe slots rather than conventional PCI ones.
* Bug fixes:
- qemu: fixed crash in ``qemuDomainBlockCommit``
This release fixes a regression which was introduced in libvirt v6.4.0
where libvirtd always crashes when a block commit of a disk is requested.
- qemu: fixed zPCI address auto generation on s390
Removes the correlation between the zPCI address attributes uid and fid.
Fixes the validation and autogeneration of zPCI address attributes.
- qemu: Skip pre-creation of NVMe disks during migration
libvirt has no way to create NVMe devices on the target host, so it now
just makes sure they exist and let the migration proceed in that case.
Thanks everybody for the help on putting this release out, and
the gazillion ones before :-)
Enjoy and stay safe !
Daniel
--
Daniel Veillard | Red Hat Developers Tools http://developer.redhat.com/
veillard(a)redhat.com | libxml Gnome XML XSLT toolkit http://xmlsoft.org/
http://veillard.com/ | virtualization library http://libvirt.org/
3 years, 9 months
[PATCH] virt-aa-helper: disallow graphics socket read permissions
by Simon Arlott
The VM does not need read permission for its own sockets to create(),
bind(), accept() connections or to recv(), send(), etc. on connections.
This was fixed in ab9569e5460d1e4737fe8b625c67687dc2204665
(virt-aa-helper: disallow VNC socket read permissions),
but then b6465e1aa49397367a9cd0f27110b9c2280a7385
(graphics: introduce new listen type 'socket')
and acc83afe333bfadd3f7f79091d38ca3d7da1eeb2
(acc83afe333bfadd3f7f79091d38ca3d7da1eeb2) reverted it.
Unless the read permission is omitted, VMs can connect to each other's
VNC/graphics sockets.
Signed-off-by: Simon Arlott <libvirt(a)octiron.net>
---
src/security/virt-aa-helper.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/security/virt-aa-helper.c b/src/security/virt-aa-helper.c
index 6e6dd1b1db..fddbdafc41 100644
--- a/src/security/virt-aa-helper.c
+++ b/src/security/virt-aa-helper.c
@@ -1053,7 +1053,7 @@ get_files(vahControl * ctl)
if (listenObj.type == VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_SOCKET &&
listenObj.socket &&
- vah_add_file(&buf, listenObj.socket, "rw"))
+ vah_add_file(&buf, listenObj.socket, "w"))
goto cleanup;
}
}
--
2.17.1
--
Simon Arlott
3 years, 9 months
[PATCH] util: Add phys_port_name support on virPCIGetNetName
by Dmytro Linkin
Current virPCIGetNetName() logic is to get net device name by checking
it's phys_port_id, if caller provide it, or by it's index (eg, by it's
position at sysfs net directory). This approach worked fine up until
linux kernel version 5.8, where NVIDIA Mellanox driver implemented
linking of VFs' representors to PCI device in switchdev mode. This mean
that device's sysfs net directory will hold multiple net devices. Ex.:
$ ls '/sys/bus/pci/devices/0000:82:00.0/net'
ens1f0 eth0 eth1
Most switch devices support phys_port_name instead of phys_port_id, so
virPCIGetNetName() will try to get PF name by it's index - 0. The
problem here is that the PF nedev entry may not be the first.
To fix that, for switch devices, we introduce a new logic to select the
PF uplink netdev according to the content of phys_port_name. Extend
virPCIGetNetName() with physPortNameRegex variable to get proper device
by it's phys_port_name scheme, for ex., "p[0-9]+$" to get PF,
"pf[0-9]+vf[0-9]+$" to get VF or "p1$" to get exact net device. So now
virPCIGetNetName() logic work in following sequence:
- filter by phys_port_id, if it's provided,
or
- filter by phys_port_name, if it's regex provided,
or
- get net device by it's index (position) in sysfs net directory.
Also, make getting content of iface sysfs files more generic.
Signed-off-by: Dmytro Linkin <dlinkin(a)nvidia.com>
Reviewed-by: Adrian Chiris <adrianc(a)nvidia.com>
---
src/hypervisor/virhostdev.c | 2 +-
src/util/virnetdev.c | 74 ++++++++++++++++++++++++++++++++++++---------
src/util/virnetdev.h | 4 +++
src/util/virpci.c | 63 ++++++++++++++++++++++++++++++++++++--
src/util/virpci.h | 6 ++++
5 files changed, 130 insertions(+), 19 deletions(-)
diff --git a/src/hypervisor/virhostdev.c b/src/hypervisor/virhostdev.c
index 69102b8..1f5c347 100644
--- a/src/hypervisor/virhostdev.c
+++ b/src/hypervisor/virhostdev.c
@@ -333,7 +333,7 @@ virHostdevNetDevice(virDomainHostdevDefPtr hostdev,
* type='hostdev'>, and it is only those devices that should
* end up calling this function.
*/
- if (virPCIGetNetName(sysfs_path, 0, NULL, linkdev) < 0)
+ if (virPCIGetNetName(sysfs_path, 0, NULL, NULL, linkdev) < 0)
return -1;
if (!(*linkdev)) {
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index b42fa86..99e3b35 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -1112,6 +1112,29 @@ virNetDevGetPCIDevice(const char *devName)
}
+/* A wrapper to get content of file from ifname SYSFS_NET_DIR
+ */
+static int
+virNetDevGetSysfsFileValue(const char *ifname,
+ const char *fileName,
+ char **sysfsFileData)
+{
+ g_autofree char *sysfsFile = NULL;
+
+ *sysfsFileData = NULL;
+
+ if (virNetDevSysfsFile(&sysfsFile, ifname, fileName) < 0)
+ return -1;
+
+ /* a failure to read just means the driver doesn't support
+ * <fileName>, so set success now and ignore the return from
+ * virFileReadAllQuiet().
+ */
+
+ ignore_value(virFileReadAllQuiet(sysfsFile, 1024, sysfsFileData));
+ return 0;
+}
+
/**
* virNetDevGetPhysPortID:
*
@@ -1130,20 +1153,29 @@ int
virNetDevGetPhysPortID(const char *ifname,
char **physPortID)
{
- g_autofree char *physPortIDFile = NULL;
-
- *physPortID = NULL;
-
- if (virNetDevSysfsFile(&physPortIDFile, ifname, "phys_port_id") < 0)
- return -1;
+ return virNetDevGetSysfsFileValue(ifname, "phys_port_id", physPortID);
+}
- /* a failure to read just means the driver doesn't support
- * phys_port_id, so set success now and ignore the return from
- * virFileReadAllQuiet().
- */
- ignore_value(virFileReadAllQuiet(physPortIDFile, 1024, physPortID));
- return 0;
+/**
+ * virNetDevGetPhysPortName:
+ *
+ * @ifname: name of a netdev
+ *
+ * @physPortName: pointer to char* that will receive @ifname's
+ * phys_port_name from sysfs (null terminated
+ * string). Could be NULL if @ifname's net driver doesn't
+ * support phys_port_name (most netdev drivers
+ * don't). Caller is responsible for freeing the string
+ * when finished.
+ *
+ * Returns 0 on success or -1 on failure.
+ */
+int
+virNetDevGetPhysPortName(const char *ifname,
+ char **physPortName)
+{
+ return virNetDevGetSysfsFileValue(ifname, "phys_port_name", physPortName);
}
@@ -1200,7 +1232,7 @@ virNetDevGetVirtualFunctions(const char *pfname,
}
if (virPCIGetNetName(pci_sysfs_device_link, 0,
- pfPhysPortID, &((*vfname)[i])) < 0) {
+ pfPhysPortID, NULL, &((*vfname)[i])) < 0) {
goto cleanup;
}
@@ -1295,7 +1327,8 @@ virNetDevGetPhysicalFunction(const char *ifname, char **pfname)
return -1;
if (virPCIGetNetName(physfn_sysfs_path, 0,
- vfPhysPortID, pfname) < 0) {
+ vfPhysPortID,
+ VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) {
return -1;
}
@@ -1358,7 +1391,7 @@ virNetDevPFGetVF(const char *pfname, int vf, char **vfname)
* isn't bound to a netdev driver, it won't have a netdev name,
* and vfname will be NULL).
*/
- return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, vfname);
+ return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, NULL, vfname);
}
@@ -1403,6 +1436,17 @@ virNetDevGetPhysPortID(const char *ifname G_GNUC_UNUSED,
}
int
+virNetDevGetPhysPortName(const char *ifname G_GNUC_UNUSED,
+ char **physPortName)
+{
+ /* this actually should never be called, and is just here to
+ * satisfy the linker.
+ */
+ *physPortName = NULL;
+ return 0;
+}
+
+int
virNetDevGetVirtualFunctions(const char *pfname G_GNUC_UNUSED,
char ***vfname G_GNUC_UNUSED,
virPCIDeviceAddressPtr **virt_fns G_GNUC_UNUSED,
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index 55e3948..712421d 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -229,6 +229,10 @@ int virNetDevGetPhysPortID(const char *ifname,
char **physPortID)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
G_GNUC_WARN_UNUSED_RESULT;
+int virNetDevGetPhysPortName(const char *ifname,
+ char **physPortName)
+ ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
+ G_GNUC_WARN_UNUSED_RESULT;
int virNetDevGetVirtualFunctions(const char *pfname,
char ***vfname,
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 47c671d..18b3f66 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -2409,8 +2409,10 @@ virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
* virPCIGetNetName:
* @device_link_sysfs_path: sysfs path to the PCI device
* @idx: used to choose which netdev when there are several
- * (ignored if physPortID is set)
+ * (ignored if physPortID or physPortNameRegex is set)
* @physPortID: match this string in the netdev's phys_port_id
+ * (or NULL to ignore and use phys_port_name or idx instead)
+ * @physPortNameRegex: match this regex with netdev's phys_port_name
* (or NULL to ignore and use idx instead)
* @netname: used to return the name of the netdev
* (set to NULL (but returns success) if there is no netdev)
@@ -2421,11 +2423,13 @@ int
virPCIGetNetName(const char *device_link_sysfs_path,
size_t idx,
char *physPortID,
+ char *physPortNameRegex,
char **netname)
{
g_autofree char *pcidev_sysfs_net_path = NULL;
g_autofree char *firstEntryName = NULL;
g_autofree char *thisPhysPortID = NULL;
+ g_autofree char *thisPhysPortName = NULL;
int ret = -1;
DIR *dir = NULL;
struct dirent *entry = NULL;
@@ -2466,6 +2470,41 @@ virPCIGetNetName(const char *device_link_sysfs_path,
continue;
}
+ } else if (physPortNameRegex) {
+ /* Most switch devices use phys_port_name instead of
+ * phys_port_id.
+ * NOTE: VFs' representors net devices can be linked to PF's PCI
+ * device, which mean that there'll be multiple net devices
+ * instances and to get a proper net device need to match on
+ * specific regex.
+ * To get PF netdev, for ex., used following regex:
+ * "(p[0-9]+$)|(p[0-9]+s[0-9]+$)"
+ * or to get exact VF's netdev next regex is used:
+ * "pf0vf1$"
+ */
+ if (virNetDevGetPhysPortName(entry->d_name, &thisPhysPortName) < 0)
+ goto cleanup;
+
+ if (thisPhysPortName) {
+ /* if this one doesn't match, keep looking */
+ if (!virStringMatch(thisPhysPortName, physPortNameRegex)) {
+ VIR_FREE(thisPhysPortName);
+ /* Save the first entry we find to use as a failsafe
+ * in case we fail to match on regex.
+ */
+ if (!firstEntryName)
+ firstEntryName = g_strdup(entry->d_name);
+
+ continue;
+ }
+ } else {
+ /* Save the first entry we find to use as a failsafe in case
+ * phys_port_name is not supported.
+ */
+ if (!firstEntryName)
+ firstEntryName = g_strdup(entry->d_name);
+ continue;
+ }
} else {
if (i++ < idx)
continue;
@@ -2494,6 +2533,22 @@ virPCIGetNetName(const char *device_link_sysfs_path,
"phys_port_id '%s' under PCI device at %s"),
physPortID, device_link_sysfs_path);
}
+ } else if (physPortNameRegex) {
+ if (firstEntryName) {
+ /* We didn't match the provided phys_port_name regex, probably
+ * because kernel or NIC driver doesn't support it, so just
+ * return first netname we found.
+ */
+ *netname = firstEntryName;
+ firstEntryName = NULL;
+ ret = 0;
+ } else {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Could not find network device with "
+ "phys_port_name matching regex '%s' "
+ "under PCI device at %s"),
+ physPortNameRegex, device_link_sysfs_path);
+ }
} else {
ret = 0; /* no netdev at the given index is *not* an error */
}
@@ -2539,7 +2594,7 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
* correct.
*/
if (pfNetDevIdx == -1) {
- if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0)
+ if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, NULL, &vfname) < 0)
goto cleanup;
if (vfname) {
@@ -2550,7 +2605,8 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path,
}
if (virPCIGetNetName(pf_sysfs_device_path,
- pfNetDevIdx, vfPhysPortID, pfname) < 0) {
+ pfNetDevIdx, vfPhysPortID,
+ VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) {
goto cleanup;
}
@@ -2688,6 +2744,7 @@ int
virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED,
size_t idx G_GNUC_UNUSED,
char *physPortID G_GNUC_UNUSED,
+ char *physPortNameScheme G_GNUC_UNUSED,
char **netname G_GNUC_UNUSED)
{
virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported));
diff --git a/src/util/virpci.h b/src/util/virpci.h
index b3322ba..6ea0873 100644
--- a/src/util/virpci.h
+++ b/src/util/virpci.h
@@ -55,6 +55,11 @@ struct _virZPCIDeviceAddress {
#define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d"
+/* Represents format of PF's phys_port_name in switchdev mode:
+ * 'p%u' or 'p%us%u'. New line checked since value is readed from sysfs file.
+ */
+# define VIR_PF_PHYS_PORT_NAME_REGEX ((char *)"(p[0-9]+$)|(p[0-9]+s[0-9]+$)")
+
struct _virPCIDeviceAddress {
unsigned int domain;
unsigned int bus;
@@ -232,6 +237,7 @@ int virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr,
int virPCIGetNetName(const char *device_link_sysfs_path,
size_t idx,
char *physPortID,
+ char *physPortNameRegex,
char **netname);
int virPCIGetSysfsFile(char *virPCIDeviceName,
--
1.8.3.1
3 years, 10 months