Hi all,
On Fri, May 17, 2024 at 5:00 PM Michal Prívozník <mprivozn(a)redhat.com> wrote:
On 5/12/24 21:45, Andrew Melnychenko wrote:
> Added new capability ebpf_rss_fds for QEMU.
> Added logic for loading the "RSS" eBPF program.
> eBPF file descriptors passed to the QEMU.
>
> Signed-off-by: Andrew Melnychenko <andrew(a)daynix.com>
> ---
> src/qemu/qemu_capabilities.c | 4 +++
> src/qemu/qemu_capabilities.h | 3 ++
> src/qemu/qemu_command.c | 61 ++++++++++++++++++++++++++++++++++++
> src/qemu/qemu_domain.c | 4 +++
> src/qemu/qemu_domain.h | 3 ++
> 5 files changed, 75 insertions(+)
>
> diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
> index 09bb6ca36e..bb6a587725 100644
> --- a/src/qemu/qemu_capabilities.c
> +++ b/src/qemu/qemu_capabilities.c
> @@ -708,6 +708,9 @@ VIR_ENUM_IMPL(virQEMUCaps,
> "usb-mtp", /* QEMU_CAPS_DEVICE_USB_MTP */
> "machine.virt.ras", /* QEMU_CAPS_MACHINE_VIRT_RAS */
> "virtio-sound", /* QEMU_CAPS_DEVICE_VIRTIO_SOUND */
> +
> + /* 460 */
> + "virtio-net.ebpf_rss_fds", /*
QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS */
> );
>
>
> @@ -1452,6 +1455,7 @@ static struct virQEMUCapsDevicePropsFlags
virQEMUCapsDevicePropsVirtioBlk[] = {
> static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsVirtioNet[] = {
> { "acpi-index", QEMU_CAPS_ACPI_INDEX, NULL },
> { "rss", QEMU_CAPS_VIRTIO_NET_RSS, NULL },
> + { "ebpf-rss-fds", QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS, NULL },
> };
>
> static struct virQEMUCapsDevicePropsFlags virQEMUCapsDevicePropsPCIeRootPort[] = {
> diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h
> index 371ea19bd0..e4bb137b21 100644
> --- a/src/qemu/qemu_capabilities.h
> +++ b/src/qemu/qemu_capabilities.h
> @@ -688,6 +688,9 @@ typedef enum { /* virQEMUCapsFlags grouping marker for
syntax-check */
> QEMU_CAPS_MACHINE_VIRT_RAS, /* -machine virt,ras= */
> QEMU_CAPS_DEVICE_VIRTIO_SOUND, /* -device virtio-sound-* */
>
> + /* 460 */
> + QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS, /* virtio-net ebpf_rss_fds feature */
> +
> QEMU_CAPS_LAST /* this must always be the last item */
> } virQEMUCapsFlags;
>
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 9859ea67a4..77715cf6fe 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -3805,6 +3805,25 @@ qemuBuildLegacyNicStr(virDomainNetDef *net)
> }
>
>
> +static virJSONValue *
> +qemuBuildEbpfRssArg(virDomainNetDef *net)
> +{
> + qemuDomainNetworkPrivate *netpriv = QEMU_DOMAIN_NETWORK_PRIVATE(net);
> + g_autoptr(virJSONValue) props = NULL;
> +
> + GSList *n;
> +
> + if (netpriv->ebpfrssfds)
> + props = virJSONValueNewArray();
nitpick: A bit more convoluted than needed be.
if (!netpriv->ebpfrssfds)
return NULL;
props = virJSONValueNewArray();
I'll refactor it in the next patch.
> +
> + for (n = netpriv->ebpfrssfds; n; n = n->next) {
> + virJSONValueArrayAppendString(props, qemuFDPassDirectGetPath(n->data));
> + }
> +
> + return g_steal_pointer(&props);
> +}
> +
> +
> virJSONValue *
> qemuBuildNicDevProps(virDomainDef *def,
> virDomainNetDef *net,
> @@ -3813,6 +3832,7 @@ qemuBuildNicDevProps(virDomainDef *def,
> g_autoptr(virJSONValue) props = NULL;
> char macaddr[VIR_MAC_STRING_BUFLEN];
> g_autofree char *netdev = g_strdup_printf("host%s",
net->info.alias);
> + g_autoptr(virJSONValue) ebpffds = NULL;
We like to declare variables in their smallest possible scope. IOW, this
should be declared ..
>
> if (virDomainNetIsVirtioModel(net)) {
> const char *tx = NULL;
.. in this block.
Ok.
> @@ -3863,6 +3883,8 @@ qemuBuildNicDevProps(virDomainDef *def,
> if (!(props = qemuBuildVirtioDevProps(VIR_DOMAIN_DEVICE_NET, net,
qemuCaps)))
> return NULL;
>
> + ebpffds = qemuBuildEbpfRssArg(net);
> +
> if (virJSONValueObjectAdd(&props,
> "S:tx", tx,
> "T:ioeventfd",
net->driver.virtio.ioeventfd,
> @@ -3887,6 +3909,7 @@ qemuBuildNicDevProps(virDomainDef *def,
> "T:hash",
net->driver.virtio.rss_hash_report,
> "p:host_mtu", net->mtu,
> "T:failover", failover,
> + "A:ebpf-rss-fds", &ebpffds,
> NULL) < 0)
> return NULL;
> } else {
> @@ -4170,6 +4193,39 @@ qemuBuildWatchdogDevProps(const virDomainDef *def,
> }
>
>
> +static void
> +qemuOpenEbpfRssFds(virDomainNetDef *net, virQEMUCaps *qemuCaps)
> +{
> + const char *ebpfRSSObject = NULL;
> + int fds[16];
So there can be 16 FDs top? Isn't the actual value related to number of
virtio queues?
The 16 is just a "big enough number". Overall, I think it is possible to
dynamically figure out or allocate filedescriptors - I'll check it in
the next patches.
> + int nfds = 0;
> + size_t i = 0;
> + qemuDomainNetworkPrivate *netpriv = QEMU_DOMAIN_NETWORK_PRIVATE(net);
> +
> + netpriv->libbpfRSSObject = NULL;
> + netpriv->ebpfrssfds = NULL;
This looks suspicious. When allocating private data, g_new0() made sure
this is zeroed out.
> +
> + /* Add ebpf values */
> + if (net->driver.virtio.rss == VIR_TRISTATE_SWITCH_ON
> + && net->driver.virtio.rss_hash_report != VIR_TRISTATE_SWITCH_ON
> + && virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_EBPF_RSS_FDS)) {
> + ebpfRSSObject = virQEMUCapsGetEbpf(qemuCaps, "rss");
> + nfds = qemuInterfaceLoadEbpf(ebpfRSSObject,
&netpriv->libbpfRSSObject, fds, 16);
s/G_N_ELEMENTS(fds)/16/
Also, at this point if nfds == -1 we're not sure whether we're lacking
libbpf, or libpf failed to load eBPF program. While the former should be
skipped over, the latter is a fatal error, isn't it?
I'll check it.
>
> > +
> > + if (nfds > 0) {
>
> To increase readability of this code (by decrasing level of nesting),
> I'd write this as:
>
> if (nfds <= 0)
> return;
>
> > + for (i = 0; i < nfds; ++i) {
> > + g_autofree char *name =
g_strdup_printf("ebpfrssfd-%s-%zu", net->info.alias, i);
> > +
> > + netpriv->ebpfrssfds =
> > + g_slist_prepend(netpriv->ebpfrssfds,
qemuFDPassDirectNew(name, fds + i));
> > + }
> > + }
> > +
> > + netpriv->ebpfrssfds = g_slist_reverse(netpriv->ebpfrssfds);
> > + }
> > +}
> > +
> > +
>
> Michal
>