[RFC PATCH 00/10] VirtioNet RSS support

This series of patches add RSS property support for virtio-net-pci. Virtio RSS effectively works with TAP devices, it requires additional vectors for VirtioNet, queues for TAP device, and vCPU cores. Example of device configuration: ``` <interface type="network"> <mac address="52:54:00:c4:90:25"/> <source network="default"/> <model type="virtio"/> <driver name="qemu" queues="9" rss="on" rss_hash_report="off"/> <address type="pci" domain="0x0000" bus="0x01" slot="0x00" function="0x0"/> </interface> ``` Capability "rss" enables RSS, "rss_hash_report" - enables hashes in vheader. Qemu uses eBPF program as RSS driver. For loading RSS eBPF program, the helper is used. Path to the helper is provided by Qemu through "query-helper-paths" qmp command. The helper "qemu-ebpf-rss-helper" is built with Qemu and may differ from build to build. So it's required that the Qemu should provide a proper helper path. Libvirt would call the helper and receive the program and map fd through unix socket. Fds would be passed to Qemu in "ebpf_rss_fds" property by passing to child process or unix socket. If libvirt would fail at helper call or Qemu didn't provide the path, the Qemu would be launched without "ebpf_rss_fds" property. Without "ebpf_rss_fds" property, Qemu would try to load eBPF program by itself - usually, it would require additional system permissions. Qemu may use "in-qemu" RSS as a fallback option, which will not require system permissions, but doesn't work with vhost TAP. Qemu patches: https://lists.nongnu.org/archive/html/qemu-devel/2021-07/msg03535.html Andrew Melnychenko (10): domain_conf: Added configs for RSS and Hash report. qemu_capabilities: Added capabilites for qemu's "rss" and "hash". qemu_command: Added "rss" and "hash" properties. virsocket: Added receive for multiple fds. qemu_capabilities: Added capability for qemu's "ebpf_rss_fds". qemu_capabilities: Added capability for ebpf helper path. qemu_interface: Added ebpf helper call. qemu_command: Added ebpf RSS helper call for NIC creation. qemu_hotplug: Added helper call for hotplug NIC. docs: Added descriptions for "rss" and "rss_hash_report" configurations. docs/formatdomain.rst | 16 +++++++ src/conf/domain_conf.c | 31 +++++++++++++- src/conf/domain_conf.h | 2 + src/libvirt_private.syms | 1 + src/qemu/qemu_capabilities.c | 48 +++++++++++++++++++++ src/qemu/qemu_capabilities.h | 5 +++ src/qemu/qemu_command.c | 46 +++++++++++++++++++- src/qemu/qemu_command.h | 2 + src/qemu/qemu_hotplug.c | 30 ++++++++++++- src/qemu/qemu_interface.c | 54 +++++++++++++++++++++++ src/qemu/qemu_interface.h | 2 + src/qemu/qemu_monitor.c | 9 ++++ src/qemu/qemu_monitor.h | 3 ++ src/qemu/qemu_monitor_json.c | 50 ++++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 3 ++ src/qemu/qemu_validate.c | 16 +++++++ src/util/virsocket.c | 83 ++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 2 + 18 files changed, 399 insertions(+), 4 deletions(-) -- 2.31.1

Added "rss" and "rss_hash_report" configuration that should be used with qemu virtio RSS. Both options are triswitches. Used as "driver" options and affects only NIC with model type "virtio". In other patches - options should turn on virtio-net RSS and hash properties. Also "rss" may used to affect "ebpf_rss_fds" property of virtio-net in qemu. Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/conf/domain_conf.c | 31 ++++++++++++++++++++++++++++++- src/conf/domain_conf.h | 2 ++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 06c1fcf5e5..7289acc9c6 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -10244,6 +10244,8 @@ virDomainNetDefParseXML(virDomainXMLOption *xmlopt, g_autofree char *vhost_path = NULL; g_autofree char *tap = NULL; g_autofree char *vhost = NULL; + g_autofree char *virtio_rss = NULL; + g_autofree char *virtio_rss_hash_report = NULL; const char *prefix = xmlopt ? xmlopt->config.netPrefix : NULL; if (!(def = virDomainNetDefNew(xmlopt))) @@ -10385,6 +10387,8 @@ virDomainNetDefParseXML(virDomainXMLOption *xmlopt, queues = virXMLPropString(driver_node, "queues"); rx_queue_size = virXMLPropString(driver_node, "rx_queue_size"); tx_queue_size = virXMLPropString(driver_node, "tx_queue_size"); + virtio_rss = virXMLPropString(driver_node, "rss"); + virtio_rss_hash_report = virXMLPropString(driver_node, "rss_hash_report"); if ((filterref_node = virXPathNode("./filterref", ctxt))) { filter = virXMLPropString(filterref_node, "filter"); @@ -10796,7 +10800,24 @@ virDomainNetDefParseXML(virDomainXMLOption *xmlopt, } def->driver.virtio.tx_queue_size = q; } - + if (virtio_rss) { + if ((val = virTristateSwitchTypeFromString(virtio_rss)) <= 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("'rss' attribute must be 'on'/'off'/'default': %s"), + virtio_rss); + goto error; + } + def->driver.virtio.rss = val; + } + if (virtio_rss_hash_report) { + if ((val = virTristateSwitchTypeFromString(virtio_rss_hash_report)) <= 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("'rss_hash_report' attribute must be 'on'/'off'/'default': %s"), + virtio_rss_hash_report); + goto error; + } + def->driver.virtio.rss_hash_report = val; + } if ((tmpNode = virXPathNode("./driver/host", ctxt))) { if (virXMLPropTristateSwitch(tmpNode, "csum", VIR_XML_PROP_NONE, &def->driver.virtio.host.csum) < 0) @@ -24568,6 +24589,14 @@ virDomainVirtioNetDriverFormat(virBuffer *buf, if (def->driver.virtio.tx_queue_size) virBufferAsprintf(buf, " tx_queue_size='%u'", def->driver.virtio.tx_queue_size); + if (def->driver.virtio.rss != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(buf, " rss='%s'", + virTristateSwitchTypeToString(def->driver.virtio.rss)); + } + if (def->driver.virtio.rss_hash_report != VIR_TRISTATE_SWITCH_ABSENT) { + virBufferAsprintf(buf, " rss_hash_report='%s'", + virTristateSwitchTypeToString(def->driver.virtio.rss_hash_report)); + } virDomainVirtioOptionsFormat(buf, def->virtio); } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index ca21082624..60cb70b204 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -1046,6 +1046,8 @@ struct _virDomainNetDef { virTristateSwitch ecn; virTristateSwitch ufo; } guest; + virTristateSwitch rss; + virTristateSwitch rss_hash_report; } virtio; } driver; struct { -- 2.31.1

On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Added "rss" and "rss_hash_report" configuration that should be used with qemu virtio RSS. Both options are triswitches. Used as "driver" options and affects only NIC with model type "virtio". In other patches - options should turn on virtio-net RSS and hash properties. Also "rss" may used to affect "ebpf_rss_fds" property of virtio-net in qemu.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/conf/domain_conf.c | 31 ++++++++++++++++++++++++++++++- src/conf/domain_conf.h | 2 ++ 2 files changed, 32 insertions(+), 1 deletion(-)
Any change to domain XML should go hand in hand with RNG update (docs/schemas/...) and documentation (i.e. squash 10/10 into this one). Ideally, the patch would also introduce xml2xml test (in this case qemuxml2xmltest looks reasonable). Michal

Hi, I'll squash it and add tests. On Fri, Aug 20, 2021 at 3:57 PM Michal Prívozník <mprivozn@redhat.com> wrote:
Added "rss" and "rss_hash_report" configuration that should be used with qemu virtio RSS. Both options are triswitches. Used as "driver" options and affects only NIC with model type "virtio". In other patches - options should turn on virtio-net RSS and hash
On 7/28/21 10:17 AM, Andrew Melnychenko wrote: properties.
Also "rss" may used to affect "ebpf_rss_fds" property of virtio-net in qemu.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/conf/domain_conf.c | 31 ++++++++++++++++++++++++++++++- src/conf/domain_conf.h | 2 ++ 2 files changed, 32 insertions(+), 1 deletion(-)
Any change to domain XML should go hand in hand with RNG update (docs/schemas/...) and documentation (i.e. squash 10/10 into this one). Ideally, the patch would also introduce xml2xml test (in this case qemuxml2xmltest looks reasonable).
Michal

Added qemu's property check for virtio-net. And added capability QEMU_CAPS_VIRTIO_RSS. With "rss" and "rss_hash_report" from domain config, qemu should enable "rss" and "hash" for virtio-net. Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_validate.c | 16 ++++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 9558938866..7cd6dab259 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -637,6 +637,7 @@ VIR_ENUM_IMPL(virQEMUCaps, "confidential-guest-support", "query-display-options", "s390-pv-guest", + "virtio-net.rss", ); @@ -1421,6 +1422,7 @@ static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsVirtioNet[] = { { "failover", QEMU_CAPS_VIRTIO_NET_FAILOVER, NULL }, { "packed", QEMU_CAPS_VIRTIO_PACKED_QUEUES, NULL }, { "acpi-index", QEMU_CAPS_ACPI_INDEX, NULL }, + { "rss", QEMU_CAPS_VIRTIO_RSS, NULL }, }; static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsPCIeRootPort[] = { diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 2b1bb57a49..6b2446fe5f 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -617,6 +617,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_MACHINE_CONFIDENTAL_GUEST_SUPPORT, /* -machine confidential-guest-support */ QEMU_CAPS_QUERY_DISPLAY_OPTIONS, /* 'query-display-options' qmp command present */ QEMU_CAPS_S390_PV_GUEST, /* -object s390-pv-guest,... */ + QEMU_CAPS_VIRTIO_RSS, /* virtio-net rss feature */ QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; diff --git a/src/qemu/qemu_validate.c b/src/qemu/qemu_validate.c index a964c8593d..265938a99f 100644 --- a/src/qemu/qemu_validate.c +++ b/src/qemu/qemu_validate.c @@ -1607,6 +1607,22 @@ qemuValidateDomainDeviceDefNetwork(const virDomainNetDef *net, } } + if (net->driver.virtio.rss && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_RSS)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtio rss is not supported with this " + "QEMU binary")); + return -1; + } + + if (net->driver.virtio.rss_hash_report && + !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_RSS)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("virtio rss hash report is not supported with this " + "QEMU binary")); + return -1; + } + if (net->mtu && !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_HOST_MTU)) { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", -- 2.31.1

On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Added qemu's property check for virtio-net. And added capability QEMU_CAPS_VIRTIO_RSS. With "rss" and "rss_hash_report" from domain config, qemu should enable "rss" and "hash" for virtio-net.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_validate.c | 16 ++++++++++++++++ 3 files changed, 19 insertions(+)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 9558938866..7cd6dab259 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -637,6 +637,7 @@ VIR_ENUM_IMPL(virQEMUCaps, "confidential-guest-support", "query-display-options", "s390-pv-guest", + "virtio-net.rss",
This needs to be updated because meanwhile we introduced comments to these enum items. Michal

Hi, Ok, I'll update in RFC v2. On Fri, Aug 20, 2021 at 3:57 PM Michal Prívozník <mprivozn@redhat.com> wrote:
On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Added qemu's property check for virtio-net. And added capability QEMU_CAPS_VIRTIO_RSS. With "rss" and "rss_hash_report" from domain config, qemu should enable "rss" and "hash" for virtio-net.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_validate.c | 16 ++++++++++++++++ 3 files changed, 19 insertions(+)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 9558938866..7cd6dab259 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -637,6 +637,7 @@ VIR_ENUM_IMPL(virQEMUCaps, "confidential-guest-support", "query-display-options", "s390-pv-guest", + "virtio-net.rss",
This needs to be updated because meanwhile we introduced comments to these enum items.
Michal

If domain config contains "rss" and/or "rss_hash_report" options for driver. Also if the qemu has device capabilities for RSS. Libvirt will create an NIC device command line with enabled "rss"/"hash". Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_command.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 156af4caee..67a396a513 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3495,6 +3495,16 @@ qemuBuildNicDevStr(virDomainDef *def, if (net->driver.virtio.tx_queue_size) virBufferAsprintf(&buf, ",tx_queue_size=%u", net->driver.virtio.tx_queue_size); + if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON) { + virBufferAsprintf(&buf, ",rss=%s", + virTristateSwitchTypeToString(net->driver.virtio.rss)); + } + + if (net->driver.virtio.rss_hash_report == VIR_TRISTATE_SWITCH_ON) { + virBufferAsprintf(&buf, ",hash=%s", + virTristateSwitchTypeToString(net->driver.virtio.rss_hash_report)); + } + if (net->mtu) virBufferAsprintf(&buf, ",host_mtu=%u", net->mtu); -- 2.31.1

On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
If domain config contains "rss" and/or "rss_hash_report" options for driver. Also if the qemu has device capabilities for RSS. Libvirt will create an NIC device command line with enabled "rss"/"hash".
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_command.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
To continue my suggestion from 01/10 this could then introduce xml2argv test case. Michal

Hi, Yea, it is only RFC - later I'll add all tests for review in v2. On Fri, Aug 20, 2021 at 3:57 PM Michal Prívozník <mprivozn@redhat.com> wrote:
On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
If domain config contains "rss" and/or "rss_hash_report" options for driver. Also if the qemu has device capabilities for RSS. Libvirt will create an NIC device command line with enabled "rss"/"hash".
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_command.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
To continue my suggestion from 01/10 this could then introduce xml2argv test case.
Michal

Similar to virSocketRecvFD() added virSocketRecvMultipleFDs(). This function returns multiple fds through unix socket. New function is required for "qemu-ebpf-rss-helper" program. The helper may pass few file descriptors - eBPF program and maps. Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/libvirt_private.syms | 1 + src/util/virsocket.c | 83 ++++++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 2 + 3 files changed, 86 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 43493ea76e..6987ff00c2 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3226,6 +3226,7 @@ virSecureEraseString; # util/virsocket.h virSocketRecvFD; virSocketSendFD; +virSocketRecvMultipleFDs; # util/virsocketaddr.h diff --git a/src/util/virsocket.c b/src/util/virsocket.c index b971da16e3..da8af42e72 100644 --- a/src/util/virsocket.c +++ b/src/util/virsocket.c @@ -486,6 +486,82 @@ virSocketRecvFD(int sock, int fdflags) return fd; } + + +/* virSocketRecvMultipleFDs receives few file descriptors through the socket. + The flags are a bitmask, possibly including O_CLOEXEC (defined in <fcntl.h>). + + Return the number of recived file descriptors on success, + or -1 with errno set in case of error. +*/ +int +virSocketRecvMultipleFDs(int sock, int *fds, size_t nfds, int fdflags) +{ + char byte = 0; + struct iovec iov; + struct msghdr msg; + int ret = -1; + ssize_t len; + struct cmsghdr *cmsg; + char buf[CMSG_SPACE(sizeof(int) * nfds)]; + int fdflags_recvmsg = fdflags & O_CLOEXEC ? MSG_CMSG_CLOEXEC : 0; + int fdSize = -1; + int i = 0; + int saved_errno = 0; + + if ((fdflags & ~O_CLOEXEC) != 0) { + errno = EINVAL; + return -1; + } + + /* send at least one char */ + memset(&msg, 0, sizeof(msg)); + iov.iov_base = &byte; + iov.iov_len = 1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_name = NULL; + msg.msg_namelen = 0; + + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + + len = recvmsg(sock, &msg, fdflags_recvmsg); + if (len < 0) { + return -1; + } + + cmsg = CMSG_FIRSTHDR(&msg); + /* be paranoiac */ + if (len == 0 || cmsg == NULL || cmsg->cmsg_len < CMSG_LEN(sizeof(int)) + || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) { + /* fake errno: at end the file is not available */ + errno = len ? EACCES : ENOTCONN; + return -1; + } + + fdSize = cmsg->cmsg_len - CMSG_LEN(0); + memcpy(fds, CMSG_DATA(cmsg), fdSize); + ret = fdSize/sizeof(int); + + /* set close-on-exec flag */ + if (!MSG_CMSG_CLOEXEC && (fdflags & O_CLOEXEC)) { + for (i = 0; i < ret; ++i) { + if (virSetCloseExec(fds[i]) < 0) { + saved_errno = errno; + goto error; + } + } + } + + return ret; +error: + for (i = 0; i < ret; ++i) { + VIR_FORCE_CLOSE(fds[i]); + } + errno = saved_errno; + return -1; +} #else /* WIN32 */ int virSocketSendFD(int sock G_GNUC_UNUSED, int fd G_GNUC_UNUSED) @@ -500,4 +576,11 @@ virSocketRecvFD(int sock G_GNUC_UNUSED, int fdflags G_GNUC_UNUSED) errno = ENOSYS; return -1; } + +int +virSocketRecvMultipleFDs(int sock, int *fds, size_t nfds, int fdflags) +{ + errno = ENOSYS; + return -1; +} #endif /* WIN32 */ diff --git a/src/util/virsocket.h b/src/util/virsocket.h index 419da8b3ae..c926effbc3 100644 --- a/src/util/virsocket.h +++ b/src/util/virsocket.h @@ -22,6 +22,8 @@ int virSocketSendFD(int sock, int fd); int virSocketRecvFD(int sock, int fdflags); +int +virSocketRecvMultipleFDs(int sock, int *fds, size_t nfds, int fdflags); #ifdef WIN32 -- 2.31.1

On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Similar to virSocketRecvFD() added virSocketRecvMultipleFDs(). This function returns multiple fds through unix socket. New function is required for "qemu-ebpf-rss-helper" program. The helper may pass few file descriptors - eBPF program and maps.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/libvirt_private.syms | 1 + src/util/virsocket.c | 83 ++++++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 2 + 3 files changed, 86 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 43493ea76e..6987ff00c2 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3226,6 +3226,7 @@ virSecureEraseString; # util/virsocket.h virSocketRecvFD; virSocketSendFD; +virSocketRecvMultipleFDs;
This needs to be ordered. The correct order is: # util/virsocket.h virSocketRecvFD; virSocketRecvMultipleFDs; virSocketSendFD;
# util/virsocketaddr.h diff --git a/src/util/virsocket.c b/src/util/virsocket.c index b971da16e3..da8af42e72 100644 --- a/src/util/virsocket.c +++ b/src/util/virsocket.c @@ -486,6 +486,82 @@ virSocketRecvFD(int sock, int fdflags)
return fd; } + + +/* virSocketRecvMultipleFDs receives few file descriptors through the socket. + The flags are a bitmask, possibly including O_CLOEXEC (defined in <fcntl.h>). + + Return the number of recived file descriptors on success, + or -1 with errno set in case of error. +*/ +int +virSocketRecvMultipleFDs(int sock, int *fds, size_t nfds, int fdflags) +{ + char byte = 0; + struct iovec iov; + struct msghdr msg; + int ret = -1; + ssize_t len; + struct cmsghdr *cmsg; + char buf[CMSG_SPACE(sizeof(int) * nfds)]; + int fdflags_recvmsg = fdflags & O_CLOEXEC ? MSG_CMSG_CLOEXEC : 0; + int fdSize = -1; + int i = 0; + int saved_errno = 0; + + if ((fdflags & ~O_CLOEXEC) != 0) { + errno = EINVAL; + return -1; + } + + /* send at least one char */ + memset(&msg, 0, sizeof(msg)); + iov.iov_base = &byte; + iov.iov_len = 1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_name = NULL; + msg.msg_namelen = 0; + + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + + len = recvmsg(sock, &msg, fdflags_recvmsg); + if (len < 0) { + return -1; + } + + cmsg = CMSG_FIRSTHDR(&msg); + /* be paranoiac */ + if (len == 0 || cmsg == NULL || cmsg->cmsg_len < CMSG_LEN(sizeof(int)) + || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) { + /* fake errno: at end the file is not available */ + errno = len ? EACCES : ENOTCONN; + return -1; + } + + fdSize = cmsg->cmsg_len - CMSG_LEN(0); + memcpy(fds, CMSG_DATA(cmsg), fdSize); + ret = fdSize/sizeof(int);
Please put a space before and after '/'. Like this: ret = fdSize / sizeof(int);
+ + /* set close-on-exec flag */ + if (!MSG_CMSG_CLOEXEC && (fdflags & O_CLOEXEC)) { + for (i = 0; i < ret; ++i) { + if (virSetCloseExec(fds[i]) < 0) { + saved_errno = errno;
This isn't needed really, because ..
+ goto error; + } + } + } + + return ret; +error: + for (i = 0; i < ret; ++i) { + VIR_FORCE_CLOSE(fds[i]);
.. VIR_FORCE_CLOSE() doesn't change errno.
+ } + errno = saved_errno; + return -1; +}
But I wonder if this function is needed. I mean, we currently have virSocketRecvFD() and this new function looks very much like it. Would it be possible to turn virSocketRecvFD() into virSocketRecvMultipleFDs() and fix all (current) callers of virSocketRecvFD()? Alternatively, we can have virSocketRecvMultipleFDs() as you propose and then virSocketRecvFD() be just a thin wrapper over virSocketRecvMultipleFDs(), e.g. like this: virSocketRecvFD() { int fds[1]; virSocketRecvMultipleFDs(sock, fds, 1, fdflags); return fds[0]; } Or even better, have virSocketRecvFD() return FD via argument and its retval be 0/-1 (success/fail). My aim is to avoid having nearly the same code twice. Michal

Hi,
virSocketRecvFD() { int fds[1];
virSocketRecvMultipleFDs(sock, fds, 1, fdflags); return fds[0]; }
Yea, it's a good idea. On Fri, Aug 20, 2021 at 3:57 PM Michal Prívozník <mprivozn@redhat.com> wrote:
On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Similar to virSocketRecvFD() added virSocketRecvMultipleFDs(). This function returns multiple fds through unix socket. New function is required for "qemu-ebpf-rss-helper" program. The helper may pass few file descriptors - eBPF program and maps.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/libvirt_private.syms | 1 + src/util/virsocket.c | 83 ++++++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 2 + 3 files changed, 86 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 43493ea76e..6987ff00c2 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3226,6 +3226,7 @@ virSecureEraseString; # util/virsocket.h virSocketRecvFD; virSocketSendFD; +virSocketRecvMultipleFDs;
This needs to be ordered. The correct order is:
# util/virsocket.h virSocketRecvFD; virSocketRecvMultipleFDs; virSocketSendFD;
# util/virsocketaddr.h diff --git a/src/util/virsocket.c b/src/util/virsocket.c index b971da16e3..da8af42e72 100644 --- a/src/util/virsocket.c +++ b/src/util/virsocket.c @@ -486,6 +486,82 @@ virSocketRecvFD(int sock, int fdflags)
return fd; } + + +/* virSocketRecvMultipleFDs receives few file descriptors through the
socket.
+ The flags are a bitmask, possibly including O_CLOEXEC (defined in <fcntl.h>). + + Return the number of recived file descriptors on success, + or -1 with errno set in case of error. +*/ +int +virSocketRecvMultipleFDs(int sock, int *fds, size_t nfds, int fdflags) +{ + char byte = 0; + struct iovec iov; + struct msghdr msg; + int ret = -1; + ssize_t len; + struct cmsghdr *cmsg; + char buf[CMSG_SPACE(sizeof(int) * nfds)]; + int fdflags_recvmsg = fdflags & O_CLOEXEC ? MSG_CMSG_CLOEXEC : 0; + int fdSize = -1; + int i = 0; + int saved_errno = 0; + + if ((fdflags & ~O_CLOEXEC) != 0) { + errno = EINVAL; + return -1; + } + + /* send at least one char */ + memset(&msg, 0, sizeof(msg)); + iov.iov_base = &byte; + iov.iov_len = 1; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_name = NULL; + msg.msg_namelen = 0; + + msg.msg_control = buf; + msg.msg_controllen = sizeof(buf); + + len = recvmsg(sock, &msg, fdflags_recvmsg); + if (len < 0) { + return -1; + } + + cmsg = CMSG_FIRSTHDR(&msg); + /* be paranoiac */ + if (len == 0 || cmsg == NULL || cmsg->cmsg_len < CMSG_LEN(sizeof(int)) + || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) { + /* fake errno: at end the file is not available */ + errno = len ? EACCES : ENOTCONN; + return -1; + } + + fdSize = cmsg->cmsg_len - CMSG_LEN(0); + memcpy(fds, CMSG_DATA(cmsg), fdSize); + ret = fdSize/sizeof(int);
Please put a space before and after '/'. Like this:
ret = fdSize / sizeof(int);
+ + /* set close-on-exec flag */ + if (!MSG_CMSG_CLOEXEC && (fdflags & O_CLOEXEC)) { + for (i = 0; i < ret; ++i) { + if (virSetCloseExec(fds[i]) < 0) { + saved_errno = errno;
This isn't needed really, because ..
+ goto error; + } + } + } + + return ret; +error: + for (i = 0; i < ret; ++i) { + VIR_FORCE_CLOSE(fds[i]);
.. VIR_FORCE_CLOSE() doesn't change errno.
+ } + errno = saved_errno; + return -1; +}
But I wonder if this function is needed. I mean, we currently have virSocketRecvFD() and this new function looks very much like it. Would it be possible to turn virSocketRecvFD() into virSocketRecvMultipleFDs() and fix all (current) callers of virSocketRecvFD()?
Alternatively, we can have virSocketRecvMultipleFDs() as you propose and then virSocketRecvFD() be just a thin wrapper over virSocketRecvMultipleFDs(), e.g. like this:
virSocketRecvFD() { int fds[1];
virSocketRecvMultipleFDs(sock, fds, 1, fdflags); return fds[0]; }
Or even better, have virSocketRecvFD() return FD via argument and its retval be 0/-1 (success/fail).
My aim is to avoid having nearly the same code twice.
Michal

Added check for qemu virtio-net "ebpf_rss_fds" property. This property allows to pass eBPF program/map file descriptors for RSS program. In other patches, libvirt may launch qemu-ebpf-rss-helper and pass fds to virtio-net qemu. Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 3 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 7cd6dab259..f16115226d 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -638,6 +638,7 @@ VIR_ENUM_IMPL(virQEMUCaps, "query-display-options", "s390-pv-guest", "virtio-net.rss", + "virtio-net.ebpf_rss_fds", ); @@ -1423,6 +1424,7 @@ static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsVirtioNet[] = { { "packed", QEMU_CAPS_VIRTIO_PACKED_QUEUES, NULL }, { "acpi-index", QEMU_CAPS_ACPI_INDEX, NULL }, { "rss", QEMU_CAPS_VIRTIO_RSS, NULL }, + { "ebpf_rss_fds", QEMU_CAPS_VIRTIO_EBPF_RSS_FDS, NULL }, }; static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsPCIeRootPort[] = { diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6b2446fe5f..c938b63e91 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -618,6 +618,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_QUERY_DISPLAY_OPTIONS, /* 'query-display-options' qmp command present */ QEMU_CAPS_S390_PV_GUEST, /* -object s390-pv-guest,... */ QEMU_CAPS_VIRTIO_RSS, /* virtio-net rss feature */ + QEMU_CAPS_VIRTIO_EBPF_RSS_FDS, /* virtio-net ebpf_rss_fds feature */ QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; -- 2.31.1

On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Added check for qemu virtio-net "ebpf_rss_fds" property. This property allows to pass eBPF program/map file descriptors for RSS program. In other patches, libvirt may launch qemu-ebpf-rss-helper and pass fds to virtio-net qemu.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 3 insertions(+)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 7cd6dab259..f16115226d 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -638,6 +638,7 @@ VIR_ENUM_IMPL(virQEMUCaps, "query-display-options", "s390-pv-guest", "virtio-net.rss", + "virtio-net.ebpf_rss_fds", );
@@ -1423,6 +1424,7 @@ static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsVirtioNet[] = { { "packed", QEMU_CAPS_VIRTIO_PACKED_QUEUES, NULL }, { "acpi-index", QEMU_CAPS_ACPI_INDEX, NULL }, { "rss", QEMU_CAPS_VIRTIO_RSS, NULL }, + { "ebpf_rss_fds", QEMU_CAPS_VIRTIO_EBPF_RSS_FDS, NULL },
I haven't found this in QEMU. Looking into qemu-devel list it was proposed but apparently is not merged yet: https://lists.nongnu.org/archive/html/qemu-devel/2021-07/msg03536.html
};
static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsPCIeRootPort[] = { diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6b2446fe5f..c938b63e91 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -618,6 +618,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_QUERY_DISPLAY_OPTIONS, /* 'query-display-options' qmp command present */ QEMU_CAPS_S390_PV_GUEST, /* -object s390-pv-guest,... */ QEMU_CAPS_VIRTIO_RSS, /* virtio-net rss feature */ + QEMU_CAPS_VIRTIO_EBPF_RSS_FDS, /* virtio-net ebpf_rss_fds feature */
QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags;

On 8/20/21 2:57 PM, Michal Prívozník wrote:
On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Added check for qemu virtio-net "ebpf_rss_fds" property. This property allows to pass eBPF program/map file descriptors for RSS program. In other patches, libvirt may launch qemu-ebpf-rss-helper and pass fds to virtio-net qemu.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 3 insertions(+)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 7cd6dab259..f16115226d 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -638,6 +638,7 @@ VIR_ENUM_IMPL(virQEMUCaps, "query-display-options", "s390-pv-guest", "virtio-net.rss", + "virtio-net.ebpf_rss_fds", );
@@ -1423,6 +1424,7 @@ static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsVirtioNet[] = { { "packed", QEMU_CAPS_VIRTIO_PACKED_QUEUES, NULL }, { "acpi-index", QEMU_CAPS_ACPI_INDEX, NULL }, { "rss", QEMU_CAPS_VIRTIO_RSS, NULL }, + { "ebpf_rss_fds", QEMU_CAPS_VIRTIO_EBPF_RSS_FDS, NULL },
I haven't found this in QEMU. Looking into qemu-devel list it was proposed but apparently is not merged yet:
https://lists.nongnu.org/archive/html/qemu-devel/2021-07/msg03536.html
Ooops, hit send too early. What I wanted to say is that it's perfectly okay to send patches before QEMU part is merged. However, libvirt part won't be merged before QEMU. We did this mistake too many times before and learned our lesson. Michal

Hi, No problem, those patches are RFC. If there would be issues with the libvirt interface - qemu patches should not be applied either. On Fri, Aug 20, 2021 at 3:59 PM Michal Prívozník <mprivozn@redhat.com> wrote:
On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Added check for qemu virtio-net "ebpf_rss_fds" property. This property allows to pass eBPF program/map file descriptors for RSS
On 8/20/21 2:57 PM, Michal Prívozník wrote: program.
In other patches, libvirt may launch qemu-ebpf-rss-helper and pass fds to virtio-net qemu.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 2 ++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 3 insertions(+)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 7cd6dab259..f16115226d 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -638,6 +638,7 @@ VIR_ENUM_IMPL(virQEMUCaps, "query-display-options", "s390-pv-guest", "virtio-net.rss", + "virtio-net.ebpf_rss_fds", );
@@ -1423,6 +1424,7 @@ static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsVirtioNet[] = { { "packed", QEMU_CAPS_VIRTIO_PACKED_QUEUES, NULL }, { "acpi-index", QEMU_CAPS_ACPI_INDEX, NULL }, { "rss", QEMU_CAPS_VIRTIO_RSS, NULL }, + { "ebpf_rss_fds", QEMU_CAPS_VIRTIO_EBPF_RSS_FDS, NULL },
I haven't found this in QEMU. Looking into qemu-devel list it was proposed but apparently is not merged yet:
https://lists.nongnu.org/archive/html/qemu-devel/2021-07/msg03536.html
Ooops, hit send too early.
What I wanted to say is that it's perfectly okay to send patches before QEMU part is merged. However, libvirt part won't be merged before QEMU. We did this mistake too many times before and learned our lesson.
Michal

Added qmp call to receive helper path. Also added monitor functions. For virQEMUCaps added field for helper path. Libvirt queries "query-helper-paths" from qmp and saves in qemuCaps. The qemu and the helper should be "sync" and the helper should return proper eBPF fd and maps with "interface" for current qemu. So, qemu returns where to find the helper through qmp, if the qemu supports "ebpf_rss_fds". Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 44 +++++++++++++++++++++++++++++++ src/qemu/qemu_capabilities.h | 3 +++ src/qemu/qemu_monitor.c | 9 +++++++ src/qemu/qemu_monitor.h | 3 +++ src/qemu/qemu_monitor_json.c | 50 ++++++++++++++++++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 3 +++ 6 files changed, 112 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index f16115226d..f62088d32f 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -722,6 +722,9 @@ struct _virQEMUCaps { /* Capabilities which may differ depending on the accelerator. */ virQEMUCapsAccel kvm; virQEMUCapsAccel tcg; + + /* Helpers returned by qemu */ + char *helperEbpfRSS; }; struct virQEMUCapsSearchData { @@ -1994,6 +1997,8 @@ virQEMUCaps *virQEMUCapsNewCopy(virQEMUCaps *qemuCaps) qemuCaps->sevCapabilities) < 0) goto error; + ret->helperEbpfRSS = g_strdup(qemuCaps->helperEbpfRSS); + return ret; error: @@ -2037,6 +2042,8 @@ void virQEMUCapsDispose(void *obj) virQEMUCapsAccelClear(&qemuCaps->kvm); virQEMUCapsAccelClear(&qemuCaps->tcg); + + g_free(qemuCaps->helperEbpfRSS); } void @@ -4418,6 +4425,8 @@ virQEMUCapsLoadCache(virArch hostArch, if (virXPathBoolean("boolean(./kvmSupportsSecureGuest)", ctxt) > 0) qemuCaps->kvmSupportsSecureGuest = true; + qemuCaps->helperEbpfRSS = virXPathString("string(./EbpfHelperPath)", ctxt); + if (skipInvalidation) qemuCaps->invalidation = false; @@ -4671,6 +4680,10 @@ virQEMUCapsFormatCache(virQEMUCaps *qemuCaps) if (qemuCaps->kvmSupportsSecureGuest) virBufferAddLit(&buf, "<kvmSupportsSecureGuest/>\n"); + if (qemuCaps->helperEbpfRSS) { + virBufferAsprintf(&buf, "<EbpfHelperPath>%s</EbpfHelperPath>\n", qemuCaps->helperEbpfRSS); + } + virBufferAdjustIndent(&buf, -2); virBufferAddLit(&buf, "</qemuCaps>\n"); @@ -5265,6 +5278,30 @@ virQEMUCapsGetVirtType(virQEMUCaps *qemuCaps) return type; } +static int +virQEMUCapsProbeQMPHelperPath(virQEMUCaps *qemuCaps, + qemuMonitor *mon) +{ + g_autoptr(GHashTable) helperList = NULL; + const char *entry = NULL; + + helperList = qemuMonitorGetHelperPath(mon); + + if (!helperList) { + return -1; + } + + /* TODO: parse all helpers? */ + entry = virHashLookup(helperList, "qemu-ebpf-rss-helper"); + if (!entry) { + return -1; + } + + qemuCaps->helperEbpfRSS = g_strdup(entry); + + return 0; +} + int virQEMUCapsInitQMPMonitor(virQEMUCaps *qemuCaps, qemuMonitor *mon) @@ -5345,6 +5382,8 @@ virQEMUCapsInitQMPMonitor(virQEMUCaps *qemuCaps, if (virQEMUCapsProbeQMPHostCPU(qemuCaps, accel, mon, type) < 0) return -1; + virQEMUCapsProbeQMPHelperPath(qemuCaps, mon); + return 0; } @@ -6459,3 +6498,8 @@ virQEMUCapsStripMachineAliases(virQEMUCaps *qemuCaps) virQEMUCapsStripMachineAliasesForVirtType(qemuCaps, VIR_DOMAIN_VIRT_KVM); virQEMUCapsStripMachineAliasesForVirtType(qemuCaps, VIR_DOMAIN_VIRT_QEMU); } + +const char *virQEMUCapsGetEBPFHelperPath(virQEMUCaps *qemuCaps) +{ + return qemuCaps->helperEbpfRSS; +} diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index c938b63e91..b25bdd4ec3 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -816,3 +816,6 @@ virQEMUCapsGetKVMSupportsSecureGuest(virQEMUCaps *qemuCaps) G_GNUC_NO_INLINE; virArch virQEMUCapsArchFromString(const char *arch); const char *virQEMUCapsArchToString(virArch arch); + +const char * +virQEMUCapsGetEBPFHelperPath(virQEMUCaps *qemuCaps); diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 6e2d8010c5..585fb2d6b4 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -4617,3 +4617,12 @@ qemuMonitorQueryDirtyRate(qemuMonitor *mon, return qemuMonitorJSONQueryDirtyRate(mon, info); } + + +GHashTable * +qemuMonitorGetHelperPath(qemuMonitor *mon) +{ + QEMU_CHECK_MONITOR_NULL(mon); + + return qemuMonitorJSONGetHelperPath(mon); +} diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h index 1491c1297c..0ee87d2c46 100644 --- a/src/qemu/qemu_monitor.h +++ b/src/qemu/qemu_monitor.h @@ -1491,3 +1491,6 @@ struct _qemuMonitorDirtyRateInfo { int qemuMonitorQueryDirtyRate(qemuMonitor *mon, qemuMonitorDirtyRateInfo *info); + +GHashTable * +qemuMonitorGetHelperPath(qemuMonitor *mon); diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c index ca2c3bb6cf..0579eee36a 100644 --- a/src/qemu/qemu_monitor_json.c +++ b/src/qemu/qemu_monitor_json.c @@ -9387,3 +9387,53 @@ qemuMonitorJSONQueryDirtyRate(qemuMonitor *mon, return qemuMonitorJSONExtractDirtyRateInfo(data, info); } + +static int +qemuMonitorJSONGetHelperPathWorker(size_t pos G_GNUC_UNUSED, + virJSONValue *item, + void *opaque) +{ + const char *name = virJSONValueObjectGetString(item, "name"); + const char *path = virJSONValueObjectGetString(item, "path"); + GHashTable *pathsList = opaque; + + if (!name) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("reply data was missing 'name'")); + return -1; + } + + if (virHashAddEntry(pathsList, name, g_strdup(path)) < 0) + return -1; + + return 0; +} + +GHashTable * +qemuMonitorJSONGetHelperPath(qemuMonitor *mon) +{ + g_autoptr(GHashTable) pathsList = virHashNew(g_free); + g_autoptr(virJSONValue) cmd = NULL; + g_autoptr(virJSONValue) reply = NULL; + + if (!(cmd = qemuMonitorJSONMakeCommand("query-helper-paths", NULL))) + return NULL; + + if (qemuMonitorJSONCommand(mon, cmd, &reply) < 0) + return NULL; + + /* return empty hash */ + if (qemuMonitorJSONHasError(reply, "CommandNotFound")) + return g_steal_pointer(&pathsList); + + if (qemuMonitorJSONCheckReply(cmd, reply, VIR_JSON_TYPE_ARRAY) < 0) + return NULL; + + if (virJSONValueArrayForeachSteal(virJSONValueObjectGetArray(reply, "return"), + qemuMonitorJSONGetHelperPathWorker, + pathsList) < 0) + return NULL; + + return g_steal_pointer(&pathsList); +} + diff --git a/src/qemu/qemu_monitor_json.h b/src/qemu/qemu_monitor_json.h index 01a3ba25f1..80e47cf68f 100644 --- a/src/qemu/qemu_monitor_json.h +++ b/src/qemu/qemu_monitor_json.h @@ -706,3 +706,6 @@ qemuMonitorJSONStartDirtyRateCalc(qemuMonitor *mon, int qemuMonitorJSONQueryDirtyRate(qemuMonitor *mon, qemuMonitorDirtyRateInfo *info); + +GHashTable * +qemuMonitorJSONGetHelperPath(qemuMonitor *mon); -- 2.31.1

On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Added qmp call to receive helper path. Also added monitor functions. For virQEMUCaps added field for helper path. Libvirt queries "query-helper-paths" from qmp and saves in qemuCaps. The qemu and the helper should be "sync" and the helper should return proper eBPF fd and maps with "interface" for current qemu. So, qemu returns where to find the helper through qmp, if the qemu supports "ebpf_rss_fds".
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 44 +++++++++++++++++++++++++++++++ src/qemu/qemu_capabilities.h | 3 +++ src/qemu/qemu_monitor.c | 9 +++++++ src/qemu/qemu_monitor.h | 3 +++ src/qemu/qemu_monitor_json.c | 50 ++++++++++++++++++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 3 +++ 6 files changed, 112 insertions(+)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index f16115226d..f62088d32f 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -722,6 +722,9 @@ struct _virQEMUCaps { /* Capabilities which may differ depending on the accelerator. */ virQEMUCapsAccel kvm; virQEMUCapsAccel tcg; + + /* Helpers returned by qemu */ + char *helperEbpfRSS; };
This is not the usual way we store path for QEMU helpers. Conceptually this looks similar to qemu-bridge-helper. I suggest you take a look into our code how we deal with that binary. QEMU_BRIDGE_HELPER, bridgeHelperName look like good keywords to git grep for. Michal

Hi, Well, qemu-ebpf-rss-helper is kinda unique for each qemu emulator, that's why it's stored with qemucaps. On Fri, Aug 20, 2021 at 3:57 PM Michal Prívozník <mprivozn@redhat.com> wrote:
On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
Added qmp call to receive helper path. Also added monitor functions. For virQEMUCaps added field for helper path. Libvirt queries "query-helper-paths" from qmp and saves in qemuCaps. The qemu and the helper should be "sync" and the helper should return proper eBPF fd and maps with "interface" for current qemu. So, qemu returns where to find the helper through qmp, if the qemu supports "ebpf_rss_fds".
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_capabilities.c | 44 +++++++++++++++++++++++++++++++ src/qemu/qemu_capabilities.h | 3 +++ src/qemu/qemu_monitor.c | 9 +++++++ src/qemu/qemu_monitor.h | 3 +++ src/qemu/qemu_monitor_json.c | 50 ++++++++++++++++++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 3 +++ 6 files changed, 112 insertions(+)
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index f16115226d..f62088d32f 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -722,6 +722,9 @@ struct _virQEMUCaps { /* Capabilities which may differ depending on the accelerator. */ virQEMUCapsAccel kvm; virQEMUCapsAccel tcg; + + /* Helpers returned by qemu */ + char *helperEbpfRSS; };
This is not the usual way we store path for QEMU helpers. Conceptually this looks similar to qemu-bridge-helper. I suggest you take a look into our code how we deal with that binary. QEMU_BRIDGE_HELPER, bridgeHelperName look like good keywords to git grep for.
Michal

New function to call "qemu-ebpf-rss-helper". The helper passes few fds through unix socket. Technically libvirt should not be aware how many and what those fds. The helper should return fds that should be passed to the qemu as is and in same order. Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_interface.c | 54 +++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_interface.h | 2 ++ 2 files changed, 56 insertions(+) diff --git a/src/qemu/qemu_interface.c b/src/qemu/qemu_interface.c index ac0168c80d..76435af94e 100644 --- a/src/qemu/qemu_interface.c +++ b/src/qemu/qemu_interface.c @@ -777,3 +777,57 @@ qemuInterfaceOpenVhostNet(virDomainDef *def, return -1; } + + +int qemuEbpfRssHelper(const char *helper, int *fds, int nfds) +{ + int ret = 0; + int err = 0; + int unix_fds[2] = { -1, -1 }; + virCommand *cmd = NULL; + + if (!helper || !fds || !nfds) { + return -1; + } + + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, unix_fds); + if (ret) { + virReportSystemError(errno, "%s", _("failed to create socket")); + return -1; + } + + cmd = virCommandNew(helper); + if (cmd == NULL) { + VIR_FORCE_CLOSE(unix_fds[1]); + ret = -1; + goto cleanup; + } + virCommandAddArgFormat(cmd, "--fd=%d", unix_fds[1]); + virCommandPassFD(cmd, unix_fds[1], VIR_COMMAND_PASS_FD_CLOSE_PARENT); + virCommandDoAsyncIO(cmd); + + if (virCommandRunAsync(cmd, NULL) < 0) { + VIR_FORCE_CLOSE(unix_fds[1]); + ret = -1; + goto cleanup; + } + + memset(fds, 0, sizeof(*fds) * nfds); + + ret = virSocketRecvMultipleFDs(unix_fds[0], fds, nfds, 0); + + if (virCommandWait(cmd, &err) < 0) { + int i = 0; + for (; i < ret; ++i) { + if (fds[i]) { + VIR_FORCE_CLOSE(fds[i]); + } + } + ret = -1; + } + +cleanup: + VIR_FORCE_CLOSE(unix_fds[0]); + virCommandFree(cmd); + return ret; +} diff --git a/src/qemu/qemu_interface.h b/src/qemu/qemu_interface.h index 438d548065..63d7590035 100644 --- a/src/qemu/qemu_interface.h +++ b/src/qemu/qemu_interface.h @@ -61,3 +61,5 @@ int qemuInterfacePrepareSlirp(virQEMUDriver *driver, qemuSlirp **slirp); int qemuInterfaceVDPAConnect(virDomainNetDef *net) G_GNUC_NO_INLINE; + +int qemuEbpfRssHelper(const char *helper, int *fds, int nfds); -- 2.31.1

On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
New function to call "qemu-ebpf-rss-helper". The helper passes few fds through unix socket. Technically libvirt should not be aware how many and what those fds. The helper should return fds that should be passed to the qemu as is and in same order.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_interface.c | 54 +++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_interface.h | 2 ++ 2 files changed, 56 insertions(+)
diff --git a/src/qemu/qemu_interface.c b/src/qemu/qemu_interface.c index ac0168c80d..76435af94e 100644 --- a/src/qemu/qemu_interface.c +++ b/src/qemu/qemu_interface.c @@ -777,3 +777,57 @@ qemuInterfaceOpenVhostNet(virDomainDef *def,
return -1; } + + +int qemuEbpfRssHelper(const char *helper, int *fds, int nfds) +{ + int ret = 0; + int err = 0; + int unix_fds[2] = { -1, -1 }; + virCommand *cmd = NULL;
If you'd use: g_autoptr(virCommand) cmd = NULL; then you can ditch that explicit virCommandFree() call below.
+ + if (!helper || !fds || !nfds) { + return -1; + } + + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, unix_fds); + if (ret) { + virReportSystemError(errno, "%s", _("failed to create socket")); + return -1; + } + + cmd = virCommandNew(helper); + if (cmd == NULL) {
This can't really happen. Also, all virCommand* APIs are prepared for this situation so that we can write simpler code: cmd = virCommandNew(); virCommandSomething(cmd); virCommandSomethingElse(cmd); if (virCommandRunAsync(cmd) < 0) /* This is the point where we learn about any previous error */
+ VIR_FORCE_CLOSE(unix_fds[1]); + ret = -1; + goto cleanup; + } + virCommandAddArgFormat(cmd, "--fd=%d", unix_fds[1]); + virCommandPassFD(cmd, unix_fds[1], VIR_COMMAND_PASS_FD_CLOSE_PARENT); + virCommandDoAsyncIO(cmd); + + if (virCommandRunAsync(cmd, NULL) < 0) { + VIR_FORCE_CLOSE(unix_fds[1]);
This doesn't look right. Even in case of failure virCommandRun* should have closed all CLOSE_PARENT FDs. So this is effectively a double close.
+ ret = -1; + goto cleanup; + } + + memset(fds, 0, sizeof(*fds) * nfds); + + ret = virSocketRecvMultipleFDs(unix_fds[0], fds, nfds, 0); + + if (virCommandWait(cmd, &err) < 0) { + int i = 0; + for (; i < ret; ++i) { + if (fds[i]) {
This check seems needless. After successful return from virSocketRecvMultipleFDs() the @fds array must be filled with only valid FDs.
+ VIR_FORCE_CLOSE(fds[i]); + } + } + ret = -1; + } + +cleanup: + VIR_FORCE_CLOSE(unix_fds[0]); + virCommandFree(cmd); + return ret; +} diff --git a/src/qemu/qemu_interface.h b/src/qemu/qemu_interface.h index 438d548065..63d7590035 100644 --- a/src/qemu/qemu_interface.h +++ b/src/qemu/qemu_interface.h @@ -61,3 +61,5 @@ int qemuInterfacePrepareSlirp(virQEMUDriver *driver, qemuSlirp **slirp);
int qemuInterfaceVDPAConnect(virDomainNetDef *net) G_GNUC_NO_INLINE; + +int qemuEbpfRssHelper(const char *helper, int *fds, int nfds);
Michal

The helper called before NIC string creation. EBPF fds passed to the child process. The helper called if qemu supports "ebpf_rss_fds" and returns the helper path. If libvirt can't retrieve fds from the helper, qemu will be launched without "ebpf_rss_fds" virtio-net property. Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_command.c | 36 +++++++++++++++++++++++++++++++++++- src/qemu/qemu_command.h | 2 ++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 67a396a513..2fffcee609 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3377,11 +3377,14 @@ qemuBuildNicDevStr(virDomainDef *def, virDomainNetDef *net, unsigned int bootindex, size_t vhostfdSize, + char **ebpfRSSfds, + size_t ebpfRSSnfds, virQEMUCaps *qemuCaps) { g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; bool usingVirtio = false; char macaddr[VIR_MAC_STRING_BUFLEN]; + size_t i = 0; if (virDomainNetIsVirtioModel(net)) { if (qemuBuildVirtioDevStr(&buf, "virtio-net", qemuCaps, @@ -3498,6 +3501,15 @@ qemuBuildNicDevStr(virDomainDef *def, if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON) { virBufferAsprintf(&buf, ",rss=%s", virTristateSwitchTypeToString(net->driver.virtio.rss)); + + if (ebpfRSSfds != NULL && ebpfRSSnfds) + { + virBufferAsprintf(&buf, ",ebpf_rss_fds="); + for (i = 0; i < (ebpfRSSnfds - 1); ++i) { + virBufferAsprintf(&buf, "%s:",ebpfRSSfds[i]); + } + virBufferAsprintf(&buf, "%s",ebpfRSSfds[ebpfRSSnfds - 1]); + } } if (net->driver.virtio.rss_hash_report == VIR_TRISTATE_SWITCH_ON) { @@ -8492,6 +8504,9 @@ qemuBuildInterfaceCommandLine(virQEMUDriver *driver, qemuSlirp *slirp; size_t i; g_autoptr(virJSONValue) hostnetprops = NULL; + char **ebpfRSSfdsName = NULL; + int ebpfRSSfds[16] = {}; + int ebpfRSSnfds = 0; if (!bootindex) @@ -8744,8 +8759,20 @@ qemuBuildInterfaceCommandLine(virQEMUDriver *driver, if (qemuCommandAddExtDevice(cmd, &net->info) < 0) goto cleanup; + if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON + && virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_EBPF_RSS_FDS)) { + ebpfRSSnfds = qemuEbpfRssHelper(virQEMUCapsGetEBPFHelperPath(qemuCaps), ebpfRSSfds, 16); + if (ebpfRSSnfds > 0) { + ebpfRSSfdsName = g_new0(char *, ebpfRSSnfds); + for (i = 0; i < ebpfRSSnfds; ++i) { + ebpfRSSfdsName[i] = g_strdup_printf("%d", ebpfRSSfds[i]); + virCommandPassFD(cmd, ebpfRSSfds[i], VIR_COMMAND_PASS_FD_CLOSE_PARENT); + } + } + } + if (!(nic = qemuBuildNicDevStr(def, net, bootindex, - net->driver.virtio.queues, qemuCaps))) + net->driver.virtio.queues, ebpfRSSfdsName, ebpfRSSnfds, qemuCaps))) goto cleanup; virCommandAddArgList(cmd, "-device", nic, NULL); } else if (!requireNicdev) { @@ -8785,6 +8812,13 @@ qemuBuildInterfaceCommandLine(virQEMUDriver *driver, } VIR_FREE(tapfdName); VIR_FREE(vhostfd); + for (i = 0; ret < 0 && ebpfRSSfds[i] && i < 16; i++) { + if (ret < 0) + VIR_FORCE_CLOSE(ebpfRSSfds[i]); + if (ebpfRSSfdsName) + VIR_FREE(ebpfRSSfdsName[i]); + } + VIR_FREE(ebpfRSSfdsName); VIR_FREE(tapfd); VIR_FORCE_CLOSE(vdpafd); return ret; diff --git a/src/qemu/qemu_command.h b/src/qemu/qemu_command.h index 188e63ea1f..cab0bb7770 100644 --- a/src/qemu/qemu_command.h +++ b/src/qemu/qemu_command.h @@ -107,6 +107,8 @@ char *qemuBuildNicDevStr(virDomainDef *def, virDomainNetDef *net, unsigned int bootindex, size_t vhostfdSize, + char **ebpf_rss_fds, + size_t nfds, virQEMUCaps *qemuCaps); char *qemuDeviceDriveHostAlias(virDomainDiskDef *disk); -- 2.31.1

On 7/28/21 10:17 AM, Andrew Melnychenko wrote:
The helper called before NIC string creation. EBPF fds passed to the child process. The helper called if qemu supports "ebpf_rss_fds" and returns the helper path. If libvirt can't retrieve fds from the helper, qemu will be launched without "ebpf_rss_fds" virtio-net property.
Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_command.c | 36 +++++++++++++++++++++++++++++++++++- src/qemu/qemu_command.h | 2 ++ 2 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 67a396a513..2fffcee609 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -3377,11 +3377,14 @@ qemuBuildNicDevStr(virDomainDef *def, virDomainNetDef *net, unsigned int bootindex, size_t vhostfdSize, + char **ebpfRSSfds, + size_t ebpfRSSnfds, virQEMUCaps *qemuCaps) { g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER; bool usingVirtio = false; char macaddr[VIR_MAC_STRING_BUFLEN]; + size_t i = 0;
if (virDomainNetIsVirtioModel(net)) { if (qemuBuildVirtioDevStr(&buf, "virtio-net", qemuCaps, @@ -3498,6 +3501,15 @@ qemuBuildNicDevStr(virDomainDef *def, if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON) { virBufferAsprintf(&buf, ",rss=%s", virTristateSwitchTypeToString(net->driver.virtio.rss)); + + if (ebpfRSSfds != NULL && ebpfRSSnfds) + {
I think it's sufficient to check just for ebpfRSSnfds > 0 because it can't happen that these two variable disagree.
+ virBufferAsprintf(&buf, ",ebpf_rss_fds="); + for (i = 0; i < (ebpfRSSnfds - 1); ++i) { + virBufferAsprintf(&buf, "%s:",ebpfRSSfds[i]); + } + virBufferAsprintf(&buf, "%s",ebpfRSSfds[ebpfRSSnfds - 1]); + } }
if (net->driver.virtio.rss_hash_report == VIR_TRISTATE_SWITCH_ON) { @@ -8492,6 +8504,9 @@ qemuBuildInterfaceCommandLine(virQEMUDriver *driver, qemuSlirp *slirp; size_t i; g_autoptr(virJSONValue) hostnetprops = NULL; + char **ebpfRSSfdsName = NULL; + int ebpfRSSfds[16] = {};
This constant look magic.
+ int ebpfRSSnfds = 0;
if (!bootindex) @@ -8744,8 +8759,20 @@ qemuBuildInterfaceCommandLine(virQEMUDriver *driver, if (qemuCommandAddExtDevice(cmd, &net->info) < 0) goto cleanup;
+ if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON + && virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_EBPF_RSS_FDS)) { + ebpfRSSnfds = qemuEbpfRssHelper(virQEMUCapsGetEBPFHelperPath(qemuCaps), ebpfRSSfds, 16); + if (ebpfRSSnfds > 0) { + ebpfRSSfdsName = g_new0(char *, ebpfRSSnfds); + for (i = 0; i < ebpfRSSnfds; ++i) { + ebpfRSSfdsName[i] = g_strdup_printf("%d", ebpfRSSfds[i]); + virCommandPassFD(cmd, ebpfRSSfds[i], VIR_COMMAND_PASS_FD_CLOSE_PARENT); + } + } + } + if (!(nic = qemuBuildNicDevStr(def, net, bootindex, - net->driver.virtio.queues, qemuCaps))) + net->driver.virtio.queues, ebpfRSSfdsName, ebpfRSSnfds, qemuCaps))) goto cleanup; virCommandAddArgList(cmd, "-device", nic, NULL); } else if (!requireNicdev) { @@ -8785,6 +8812,13 @@ qemuBuildInterfaceCommandLine(virQEMUDriver *driver, } VIR_FREE(tapfdName); VIR_FREE(vhostfd); + for (i = 0; ret < 0 && ebpfRSSfds[i] && i < 16; i++) { + if (ret < 0) + VIR_FORCE_CLOSE(ebpfRSSfds[i]);
I'm this this ret < 0 check is duplicated. Also usually we write it like this: if (ret < 0) { for () ; /* other cleanup code goes here */ } Michal

The helper called before NIC string creation. eBPF fds passed to the child process through qmp. Qemu should support "ebpf_rss_fds" and return path to the helper. Also added check for device "update". Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- src/qemu/qemu_hotplug.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 6743a8a742..6dd8e7dad4 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1205,6 +1205,9 @@ qemuDomainAttachNetDevice(virQEMUDriver *driver, g_autofree char *netdev_name = NULL; g_autoptr(virConnect) conn = NULL; virErrorPtr save_err = NULL; + char **ebpfRSSfdsName = NULL; + int ebpfRSSfds[16] = {}; + int ebpfRSSnfds = 0; /* If appropriate, grab a physical device from the configured * network's pool of devices, or resolve bridge device name @@ -1495,8 +1498,23 @@ qemuDomainAttachNetDevice(virQEMUDriver *driver, for (i = 0; i < vhostfdSize; i++) VIR_FORCE_CLOSE(vhostfd[i]); + if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON + && virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VIRTIO_EBPF_RSS_FDS)) { + ebpfRSSnfds = qemuEbpfRssHelper(virQEMUCapsGetEBPFHelperPath(priv->qemuCaps), ebpfRSSfds, 16); + if (ebpfRSSnfds > 0) { + ebpfRSSfdsName = g_new0(char *, ebpfRSSnfds); + for (i = 0; i < ebpfRSSnfds; ++i) { + ebpfRSSfdsName[i] = g_strdup_printf("ebpfrssfd-%s%zu", net->info.alias, i); + if (qemuMonitorSendFileHandle(priv->mon, NULL, ebpfRSSfds[i])) { + ebpfRSSnfds = 0; + break; + } + } + } + } + if (!(nicstr = qemuBuildNicDevStr(vm->def, net, 0, - queueSize, priv->qemuCaps))) + queueSize, ebpfRSSfdsName, ebpfRSSnfds, priv->qemuCaps))) goto try_remove; qemuDomainObjEnterMonitor(driver, vm); @@ -1599,6 +1617,12 @@ qemuDomainAttachNetDevice(virQEMUDriver *driver, } VIR_FREE(vhostfd); VIR_FREE(vhostfdName); + for (i = 0; ret < 0 && ebpfRSSfds[i] && i < 16; i++) { + VIR_FORCE_CLOSE(ebpfRSSfds[i]); + if (ebpfRSSfdsName) + VIR_FREE(ebpfRSSfdsName[i]); + } + VIR_FREE(ebpfRSSfdsName); virDomainCCWAddressSetFree(ccwaddrs); VIR_FORCE_CLOSE(slirpfd); VIR_FORCE_CLOSE(vdpafd); @@ -3624,7 +3648,9 @@ qemuDomainChangeNet(virQEMUDriver *driver, olddev->driver.virtio.guest.tso4 != newdev->driver.virtio.guest.tso4 || olddev->driver.virtio.guest.tso6 != newdev->driver.virtio.guest.tso6 || olddev->driver.virtio.guest.ecn != newdev->driver.virtio.guest.ecn || - olddev->driver.virtio.guest.ufo != newdev->driver.virtio.guest.ufo)) { + olddev->driver.virtio.guest.ufo != newdev->driver.virtio.guest.ufo || + olddev->driver.virtio.rss != newdev->driver.virtio.rss || + olddev->driver.virtio.rss_hash_report != newdev->driver.virtio.rss_hash_report)) { virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s", _("cannot modify virtio network device driver attributes")); goto cleanup; -- 2.31.1

Signed-off-by: Andrew Melnychenko <andrew@daynix.com> --- docs/formatdomain.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst index 61ccd8895a..22ad19439f 100644 --- a/docs/formatdomain.rst +++ b/docs/formatdomain.rst @@ -5233,6 +5233,22 @@ following attributes are available for the ``"virtio"`` NIC driver: only for ``vhostuser`` type. :since:`Since 3.7.0 (QEMU and KVM only)` **In general you should leave this option alone, unless you are very certain you know what you are doing.** +``rss`` + The ``rss`` option enables in-qemu/ebpf RSS for virtio NIC. RSS works with + virtio and tap backends only. For ebpf RSS the "ebpf helper" is used. Helper + binary path received from qemu by qmp. The helper is't used if qemu doesn't + provide "ebpf_rss_fds" property or helper path. Without helper, virtio NIC + will be launched only with "rss" property. Qemu may load eBPF by itself if it + has CAP_SYS_ADMIN permissions. In other cases, "in-qemu" RSS is used. + **In general you should leave this option alone, unless you are very certain + you know what you are doing.** +``rss_hash_report`` + The ``rss_hash_report`` option enables in-qemu RSS hash report for virtio + NIC. Ebpf RSS doesn't support hash report yet. Usually enabled alongside with + ``rss``. Without ``rss`` option, the hash report doesn't affect steering + itself but provides vnet header with a calculated hash. + **In general you should leave this option alone, unless you are very certain + you know what you are doing.** virtio options For virtio interfaces, `Virtio-specific options <#elementsVirtio>`__ can also be set. ( :since:`Since 3.5.0` ) -- 2.31.1

Ping On Wed, Jul 28, 2021 at 11:17 AM Andrew Melnychenko <andrew@daynix.com> wrote:
This series of patches add RSS property support for virtio-net-pci.
Virtio RSS effectively works with TAP devices, it requires additional vectors for VirtioNet, queues for TAP device, and vCPU cores. Example of device configuration: ``` <interface type="network"> <mac address="52:54:00:c4:90:25"/> <source network="default"/> <model type="virtio"/> <driver name="qemu" queues="9" rss="on" rss_hash_report="off"/> <address type="pci" domain="0x0000" bus="0x01" slot="0x00" function="0x0"/> </interface> ```
Capability "rss" enables RSS, "rss_hash_report" - enables hashes in vheader. Qemu uses eBPF program as RSS driver. For loading RSS eBPF program, the helper is used. Path to the helper is provided by Qemu through "query-helper-paths" qmp command. The helper "qemu-ebpf-rss-helper" is built with Qemu and may differ from build to build. So it's required that the Qemu should provide a proper helper path. Libvirt would call the helper and receive the program and map fd through unix socket. Fds would be passed to Qemu in "ebpf_rss_fds" property by passing to child process or unix socket. If libvirt would fail at helper call or Qemu didn't provide the path, the Qemu would be launched without "ebpf_rss_fds" property. Without "ebpf_rss_fds" property, Qemu would try to load eBPF program by itself - usually, it would require additional system permissions. Qemu may use "in-qemu" RSS as a fallback option, which will not require system permissions, but doesn't work with vhost TAP.
Qemu patches: https://lists.nongnu.org/archive/html/qemu-devel/2021-07/msg03535.html
Andrew Melnychenko (10): domain_conf: Added configs for RSS and Hash report. qemu_capabilities: Added capabilites for qemu's "rss" and "hash". qemu_command: Added "rss" and "hash" properties. virsocket: Added receive for multiple fds. qemu_capabilities: Added capability for qemu's "ebpf_rss_fds". qemu_capabilities: Added capability for ebpf helper path. qemu_interface: Added ebpf helper call. qemu_command: Added ebpf RSS helper call for NIC creation. qemu_hotplug: Added helper call for hotplug NIC. docs: Added descriptions for "rss" and "rss_hash_report" configurations.
docs/formatdomain.rst | 16 +++++++ src/conf/domain_conf.c | 31 +++++++++++++- src/conf/domain_conf.h | 2 + src/libvirt_private.syms | 1 + src/qemu/qemu_capabilities.c | 48 +++++++++++++++++++++ src/qemu/qemu_capabilities.h | 5 +++ src/qemu/qemu_command.c | 46 +++++++++++++++++++- src/qemu/qemu_command.h | 2 + src/qemu/qemu_hotplug.c | 30 ++++++++++++- src/qemu/qemu_interface.c | 54 +++++++++++++++++++++++ src/qemu/qemu_interface.h | 2 + src/qemu/qemu_monitor.c | 9 ++++ src/qemu/qemu_monitor.h | 3 ++ src/qemu/qemu_monitor_json.c | 50 ++++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 3 ++ src/qemu/qemu_validate.c | 16 +++++++ src/util/virsocket.c | 83 ++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 2 + 18 files changed, 399 insertions(+), 4 deletions(-)
-- 2.31.1

Hi, Andrew. We in Virtuozzo are interested in this functionality too. Do you plan to continue your work on it? Nikolay пн, 16 авг. 2021 г. в 15:00, Andrew Melnichenko <andrew@daynix.com>:
Ping
On Wed, Jul 28, 2021 at 11:17 AM Andrew Melnychenko <andrew@daynix.com> wrote:
This series of patches add RSS property support for virtio-net-pci.
Virtio RSS effectively works with TAP devices, it requires additional vectors for VirtioNet, queues for TAP device, and vCPU cores. Example of device configuration: ``` <interface type="network"> <mac address="52:54:00:c4:90:25"/> <source network="default"/> <model type="virtio"/> <driver name="qemu" queues="9" rss="on" rss_hash_report="off"/> <address type="pci" domain="0x0000" bus="0x01" slot="0x00" function="0x0"/> </interface> ```
Capability "rss" enables RSS, "rss_hash_report" - enables hashes in vheader. Qemu uses eBPF program as RSS driver. For loading RSS eBPF program, the helper is used. Path to the helper is provided by Qemu through "query-helper-paths" qmp command. The helper "qemu-ebpf-rss-helper" is built with Qemu and may differ from build to build. So it's required that the Qemu should provide a proper helper path. Libvirt would call the helper and receive the program and map fd through unix socket. Fds would be passed to Qemu in "ebpf_rss_fds" property by passing to child process or unix socket. If libvirt would fail at helper call or Qemu didn't provide the path, the Qemu would be launched without "ebpf_rss_fds" property. Without "ebpf_rss_fds" property, Qemu would try to load eBPF program by itself - usually, it would require additional system permissions. Qemu may use "in-qemu" RSS as a fallback option, which will not require system permissions, but doesn't work with vhost TAP.
Qemu patches: https://lists.nongnu.org/archive/html/qemu-devel/2021-07/msg03535.html
Andrew Melnychenko (10): domain_conf: Added configs for RSS and Hash report. qemu_capabilities: Added capabilites for qemu's "rss" and "hash". qemu_command: Added "rss" and "hash" properties. virsocket: Added receive for multiple fds. qemu_capabilities: Added capability for qemu's "ebpf_rss_fds". qemu_capabilities: Added capability for ebpf helper path. qemu_interface: Added ebpf helper call. qemu_command: Added ebpf RSS helper call for NIC creation. qemu_hotplug: Added helper call for hotplug NIC. docs: Added descriptions for "rss" and "rss_hash_report" configurations.
docs/formatdomain.rst | 16 +++++++ src/conf/domain_conf.c | 31 +++++++++++++- src/conf/domain_conf.h | 2 + src/libvirt_private.syms | 1 + src/qemu/qemu_capabilities.c | 48 +++++++++++++++++++++ src/qemu/qemu_capabilities.h | 5 +++ src/qemu/qemu_command.c | 46 +++++++++++++++++++- src/qemu/qemu_command.h | 2 + src/qemu/qemu_hotplug.c | 30 ++++++++++++- src/qemu/qemu_interface.c | 54 +++++++++++++++++++++++ src/qemu/qemu_interface.h | 2 + src/qemu/qemu_monitor.c | 9 ++++ src/qemu/qemu_monitor.h | 3 ++ src/qemu/qemu_monitor_json.c | 50 ++++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 3 ++ src/qemu/qemu_validate.c | 16 +++++++ src/util/virsocket.c | 83 ++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 2 + 18 files changed, 399 insertions(+), 4 deletions(-)
-- 2.31.1

Hi, Yes, the work is in progress. Now. I'm working with a proper solution for the eBPF RSS helper. On Wed, Oct 20, 2021 at 3:23 PM Nikolay Shirokovskiy < nshirokovskiy@virtuozzo.com> wrote:
Hi, Andrew.
We in Virtuozzo are interested in this functionality too. Do you plan to continue your work on it?
Nikolay
пн, 16 авг. 2021 г. в 15:00, Andrew Melnichenko <andrew@daynix.com>:
Ping
On Wed, Jul 28, 2021 at 11:17 AM Andrew Melnychenko <andrew@daynix.com> wrote:
This series of patches add RSS property support for virtio-net-pci.
Virtio RSS effectively works with TAP devices, it requires additional vectors for VirtioNet, queues for TAP device, and vCPU cores. Example of device configuration: ``` <interface type="network"> <mac address="52:54:00:c4:90:25"/> <source network="default"/> <model type="virtio"/> <driver name="qemu" queues="9" rss="on" rss_hash_report="off"/> <address type="pci" domain="0x0000" bus="0x01" slot="0x00" function="0x0"/> </interface> ```
Capability "rss" enables RSS, "rss_hash_report" - enables hashes in vheader. Qemu uses eBPF program as RSS driver. For loading RSS eBPF program, the helper is used. Path to the helper is provided by Qemu through "query-helper-paths" qmp command. The helper "qemu-ebpf-rss-helper" is built with Qemu and may differ from build to build. So it's required that the Qemu should provide a proper helper path. Libvirt would call the helper and receive the program and map fd through unix socket. Fds would be passed to Qemu in "ebpf_rss_fds" property by passing to child process or unix socket. If libvirt would fail at helper call or Qemu didn't provide the path, the Qemu would be launched without "ebpf_rss_fds" property. Without "ebpf_rss_fds" property, Qemu would try to load eBPF program by itself - usually, it would require additional system permissions. Qemu may use "in-qemu" RSS as a fallback option, which will not require system permissions, but doesn't work with vhost TAP.
Qemu patches: https://lists.nongnu.org/archive/html/qemu-devel/2021-07/msg03535.html
Andrew Melnychenko (10): domain_conf: Added configs for RSS and Hash report. qemu_capabilities: Added capabilites for qemu's "rss" and "hash". qemu_command: Added "rss" and "hash" properties. virsocket: Added receive for multiple fds. qemu_capabilities: Added capability for qemu's "ebpf_rss_fds". qemu_capabilities: Added capability for ebpf helper path. qemu_interface: Added ebpf helper call. qemu_command: Added ebpf RSS helper call for NIC creation. qemu_hotplug: Added helper call for hotplug NIC. docs: Added descriptions for "rss" and "rss_hash_report" configurations.
docs/formatdomain.rst | 16 +++++++ src/conf/domain_conf.c | 31 +++++++++++++- src/conf/domain_conf.h | 2 + src/libvirt_private.syms | 1 + src/qemu/qemu_capabilities.c | 48 +++++++++++++++++++++ src/qemu/qemu_capabilities.h | 5 +++ src/qemu/qemu_command.c | 46 +++++++++++++++++++- src/qemu/qemu_command.h | 2 + src/qemu/qemu_hotplug.c | 30 ++++++++++++- src/qemu/qemu_interface.c | 54 +++++++++++++++++++++++ src/qemu/qemu_interface.h | 2 + src/qemu/qemu_monitor.c | 9 ++++ src/qemu/qemu_monitor.h | 3 ++ src/qemu/qemu_monitor_json.c | 50 ++++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 3 ++ src/qemu/qemu_validate.c | 16 +++++++ src/util/virsocket.c | 83 ++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 2 + 18 files changed, 399 insertions(+), 4 deletions(-)
-- 2.31.1

чт, 21 окт. 2021 г. в 01:28, Andrew Melnichenko <andrew@daynix.com>:
Hi, Yes, the work is in progress. Now. I'm working with a proper solution for the eBPF RSS helper.
Ok. Thank you!
On Wed, Oct 20, 2021 at 3:23 PM Nikolay Shirokovskiy < nshirokovskiy@virtuozzo.com> wrote:
Hi, Andrew.
We in Virtuozzo are interested in this functionality too. Do you plan to continue your work on it?
Nikolay
пн, 16 авг. 2021 г. в 15:00, Andrew Melnichenko <andrew@daynix.com>:
Ping
On Wed, Jul 28, 2021 at 11:17 AM Andrew Melnychenko <andrew@daynix.com> wrote:
This series of patches add RSS property support for virtio-net-pci.
Virtio RSS effectively works with TAP devices, it requires additional vectors for VirtioNet, queues for TAP device, and vCPU cores. Example of device configuration: ``` <interface type="network"> <mac address="52:54:00:c4:90:25"/> <source network="default"/> <model type="virtio"/> <driver name="qemu" queues="9" rss="on" rss_hash_report="off"/> <address type="pci" domain="0x0000" bus="0x01" slot="0x00" function="0x0"/> </interface> ```
Capability "rss" enables RSS, "rss_hash_report" - enables hashes in vheader. Qemu uses eBPF program as RSS driver. For loading RSS eBPF program, the helper is used. Path to the helper is provided by Qemu through "query-helper-paths" qmp command. The helper "qemu-ebpf-rss-helper" is built with Qemu and may differ from build to build. So it's required that the Qemu should provide a proper helper path. Libvirt would call the helper and receive the program and map fd through unix socket. Fds would be passed to Qemu in "ebpf_rss_fds" property by passing to child process or unix socket. If libvirt would fail at helper call or Qemu didn't provide the path, the Qemu would be launched without "ebpf_rss_fds" property. Without "ebpf_rss_fds" property, Qemu would try to load eBPF program by itself - usually, it would require additional system permissions. Qemu may use "in-qemu" RSS as a fallback option, which will not require system permissions, but doesn't work with vhost TAP.
Qemu patches: https://lists.nongnu.org/archive/html/qemu-devel/2021-07/msg03535.html
Andrew Melnychenko (10): domain_conf: Added configs for RSS and Hash report. qemu_capabilities: Added capabilites for qemu's "rss" and "hash". qemu_command: Added "rss" and "hash" properties. virsocket: Added receive for multiple fds. qemu_capabilities: Added capability for qemu's "ebpf_rss_fds". qemu_capabilities: Added capability for ebpf helper path. qemu_interface: Added ebpf helper call. qemu_command: Added ebpf RSS helper call for NIC creation. qemu_hotplug: Added helper call for hotplug NIC. docs: Added descriptions for "rss" and "rss_hash_report" configurations.
docs/formatdomain.rst | 16 +++++++ src/conf/domain_conf.c | 31 +++++++++++++- src/conf/domain_conf.h | 2 + src/libvirt_private.syms | 1 + src/qemu/qemu_capabilities.c | 48 +++++++++++++++++++++ src/qemu/qemu_capabilities.h | 5 +++ src/qemu/qemu_command.c | 46 +++++++++++++++++++- src/qemu/qemu_command.h | 2 + src/qemu/qemu_hotplug.c | 30 ++++++++++++- src/qemu/qemu_interface.c | 54 +++++++++++++++++++++++ src/qemu/qemu_interface.h | 2 + src/qemu/qemu_monitor.c | 9 ++++ src/qemu/qemu_monitor.h | 3 ++ src/qemu/qemu_monitor_json.c | 50 ++++++++++++++++++++++ src/qemu/qemu_monitor_json.h | 3 ++ src/qemu/qemu_validate.c | 16 +++++++ src/util/virsocket.c | 83 ++++++++++++++++++++++++++++++++++++ src/util/virsocket.h | 2 + 18 files changed, 399 insertions(+), 4 deletions(-)
-- 2.31.1
participants (4)
-
Andrew Melnichenko
-
Andrew Melnychenko
-
Michal Prívozník
-
Nikolay Shirokovskiy