[PATCH 0/4] Add news for recent features and CVEs
by Han Han
Han Han (4):
NEWS: qemu: Add support for hyperv enlightenments features
NEWS: cpu_map: Add the EPYC-Genoa cpu mode
NEWS: Add the news for CVE-2024-2494
NEWS: Add the news for CVE-2024-4418
NEWS.rst | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
--
2.47.0
4 months, 2 weeks
[PATCH] qemuDomainDiskChangeSupported: Add missing iothreads check
by Adam Julis
GSList of iothreads is not allowed to be changed while the
virtual machine is running.
Resolves: https://issues.redhat.com/browse/RHEL-23607
Signed-off-by: Adam Julis <ajulis(a)redhat.com>
---
While the qemuDomainDiskChangeSupported() design primarily uses
its macros (CHECK_EQ and CHECK_STREQ_NULLABLE), the logic for comparing 2
GSList of iothreads could perhaps be extracted into a separate function
(e.g. IothreadsGslistCompare(GSList *first, GSList *second)). I am
absolutely not sure about this idea so feel free to comment.
src/qemu/qemu_domain.c | 53 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 298f4bfb9e..2b5222c685 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -8505,6 +8505,59 @@ qemuDomainDiskChangeSupported(virDomainDiskDef *disk,
CHECK_EQ(discard, "discard", true);
CHECK_EQ(iothread, "iothread", true);
+ /* compare list of iothreads, no change allowed */
+ if (orig_disk->iothreads != disk->iothreads) {
+ GSList *old;
+ GSList *new = disk->iothreads;
+ bool print_err = true;
+
+ for (old = orig_disk->iothreads; old; old = old->next) {
+ virDomainDiskIothreadDef *orig = old->data;
+ virDomainDiskIothreadDef *update;
+ print_err = false;
+
+ if (new == NULL) {
+ print_err = true;
+ break;
+ }
+
+ update = new->data;
+
+ if (orig->id != update->id) {
+ print_err = true;
+ break;
+ }
+
+ if (orig->nqueues != update->nqueues) {
+ print_err = true;
+ break;
+ }
+
+ if (orig->nqueues != 0) {
+ ssize_t i = 0;
+
+ while (i < orig->nqueues) {
+ if (orig->queues[i] != update->queues[i]) {
+ print_err = true;
+ break;
+ }
+ }
+ }
+
+ new = new->next;
+ if (new)
+ print_err = true;
+ }
+
+ if (print_err) {
+ virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
+ _("cannot modify field '%1$s' (or it's parts) of the disk"),
+ "iothreads");
+ return false;
+ }
+ }
+
+
CHECK_STREQ_NULLABLE(domain_name,
"backenddomain");
--
2.45.2
4 months, 2 weeks
[PATCH v2] ch: Enable callbacks for ch domain events
by Praveen K Paladugu
From: Praveen K Paladugu <prapal(a)linux.microsoft.com>
Enable callbacks for define, undefine, started, booted, stopped,
destroyed events of ch guests.
Signed-off-by: Praveen K Paladugu <praveenkpaladugu(a)gmail.com>
---
src/ch/ch_conf.h | 4 +++
src/ch/ch_driver.c | 82 ++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 84 insertions(+), 2 deletions(-)
diff --git a/src/ch/ch_conf.h b/src/ch/ch_conf.h
index a77cad7a2a..97c6c24aa5 100644
--- a/src/ch/ch_conf.h
+++ b/src/ch/ch_conf.h
@@ -24,6 +24,7 @@
#include "virthread.h"
#include "ch_capabilities.h"
#include "virebtables.h"
+#include "object_event.h"
#define CH_DRIVER_NAME "CH"
#define CH_CMD "cloud-hypervisor"
@@ -75,6 +76,9 @@ struct _virCHDriver
* then lockless thereafter */
virCHDriverConfig *config;
+ /* Immutable pointer, self-locking APIs */
+ virObjectEventState *domainEventState;
+
/* pid file FD, ensures two copies of the driver can't use the same root */
int lockFD;
diff --git a/src/ch/ch_driver.c b/src/ch/ch_driver.c
index dab025edc1..d18f266387 100644
--- a/src/ch/ch_driver.c
+++ b/src/ch/ch_driver.c
@@ -28,6 +28,7 @@
#include "ch_monitor.h"
#include "ch_process.h"
#include "domain_cgroup.h"
+#include "domain_event.h"
#include "datatypes.h"
#include "driver.h"
#include "viraccessapicheck.h"
@@ -263,6 +264,7 @@ chDomainCreateWithFlags(virDomainPtr dom, unsigned int flags)
virCHDriver *driver = dom->conn->privateData;
virDomainObj *vm;
virCHDomainObjPrivate *priv;
+ virObjectEvent *event;
g_autofree char *managed_save_path = NULL;
int ret = -1;
@@ -304,6 +306,14 @@ chDomainCreateWithFlags(virDomainPtr dom, unsigned int flags)
ret = virCHProcessStart(driver, vm, VIR_DOMAIN_RUNNING_BOOTED);
}
+ if (ret == 0) {
+ event = virDomainEventLifecycleNewFromObj(vm,
+ VIR_DOMAIN_EVENT_STARTED,
+ VIR_DOMAIN_EVENT_STARTED_BOOTED);
+ if (event)
+ virObjectEventStateQueue(driver->domainEventState, event);
+ }
+
endjob:
virDomainObjEndJob(vm);
@@ -323,8 +333,10 @@ chDomainDefineXMLFlags(virConnectPtr conn, const char *xml, unsigned int flags)
{
virCHDriver *driver = conn->privateData;
g_autoptr(virDomainDef) vmdef = NULL;
+ g_autoptr(virDomainDef) oldDef = NULL;
virDomainObj *vm = NULL;
virDomainPtr dom = NULL;
+ virObjectEvent *event = NULL;
g_autofree char *managed_save_path = NULL;
unsigned int parse_flags = VIR_DOMAIN_DEF_PARSE_INACTIVE;
@@ -345,7 +357,7 @@ chDomainDefineXMLFlags(virConnectPtr conn, const char *xml, unsigned int flags)
if (!(vm = virDomainObjListAdd(driver->domains, &vmdef,
driver->xmlopt,
- 0, NULL)))
+ 0, &oldDef)))
goto cleanup;
/* cleanup if there's any stale managedsave dir */
@@ -358,11 +370,17 @@ chDomainDefineXMLFlags(virConnectPtr conn, const char *xml, unsigned int flags)
}
vm->persistent = 1;
-
+ event = virDomainEventLifecycleNewFromObj(vm,
+ VIR_DOMAIN_EVENT_DEFINED,
+ !oldDef ?
+ VIR_DOMAIN_EVENT_DEFINED_ADDED :
+ VIR_DOMAIN_EVENT_DEFINED_UPDATED);
dom = virGetDomain(conn, vm->def->name, vm->def->uuid, vm->def->id);
cleanup:
virDomainObjEndAPI(&vm);
+ virObjectEventStateQueue(driver->domainEventState, event);
+
return dom;
}
@@ -378,6 +396,7 @@ chDomainUndefineFlags(virDomainPtr dom,
{
virCHDriver *driver = dom->conn->privateData;
virDomainObj *vm;
+ virObjectEvent *event = NULL;
int ret = -1;
virCheckFlags(0, -1);
@@ -393,6 +412,9 @@ chDomainUndefineFlags(virDomainPtr dom,
"%s", _("Cannot undefine transient domain"));
goto cleanup;
}
+ event = virDomainEventLifecycleNewFromObj(vm,
+ VIR_DOMAIN_EVENT_UNDEFINED,
+ VIR_DOMAIN_EVENT_UNDEFINED_REMOVED);
vm->persistent = 0;
if (!virDomainObjIsActive(vm)) {
@@ -403,6 +425,8 @@ chDomainUndefineFlags(virDomainPtr dom,
cleanup:
virDomainObjEndAPI(&vm);
+ virObjectEventStateQueue(driver->domainEventState, event);
+
return ret;
}
@@ -643,6 +667,7 @@ chDomainDestroyFlags(virDomainPtr dom, unsigned int flags)
{
virCHDriver *driver = dom->conn->privateData;
virDomainObj *vm;
+ virObjectEvent *event = NULL;
int ret = -1;
virCheckFlags(0, -1);
@@ -662,6 +687,9 @@ chDomainDestroyFlags(virDomainPtr dom, unsigned int flags)
if (virCHProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_DESTROYED) < 0)
goto endjob;
+ event = virDomainEventLifecycleNewFromObj(vm,
+ VIR_DOMAIN_EVENT_STOPPED,
+ VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
virCHDomainRemoveInactive(driver, vm);
ret = 0;
@@ -670,6 +698,8 @@ chDomainDestroyFlags(virDomainPtr dom, unsigned int flags)
cleanup:
virDomainObjEndAPI(&vm);
+ virObjectEventStateQueue(driver->domainEventState, event);
+
return ret;
}
@@ -1365,6 +1395,7 @@ static int chStateCleanup(void)
virObjectUnref(ch_driver->xmlopt);
virObjectUnref(ch_driver->caps);
virObjectUnref(ch_driver->domains);
+ virObjectUnref(ch_driver->domainEventState);
virMutexDestroy(&ch_driver->lock);
g_clear_pointer(&ch_driver, g_free);
@@ -1414,6 +1445,9 @@ chStateInitialize(bool privileged,
if (!(ch_driver->config = virCHDriverConfigNew(privileged)))
goto cleanup;
+ if (!(ch_driver->domainEventState = virObjectEventStateNew()))
+ goto cleanup;
+
if ((rv = chExtractVersion(ch_driver)) < 0) {
if (rv == -2)
ret = VIR_DRV_STATE_INIT_SKIPPED;
@@ -2205,6 +2239,48 @@ chDomainSetNumaParameters(virDomainPtr dom,
return ret;
}
+static int
+chConnectDomainEventRegisterAny(virConnectPtr conn,
+ virDomainPtr dom,
+ int eventID,
+ virConnectDomainEventGenericCallback callback,
+ void *opaque,
+ virFreeCallback freecb)
+{
+ virCHDriver *driver = conn->privateData;
+ int ret = -1;
+
+ if (virConnectDomainEventRegisterAnyEnsureACL(conn) < 0)
+ return -1;
+
+ if (virDomainEventStateRegisterID(conn,
+ driver->domainEventState,
+ dom, eventID,
+ callback, opaque, freecb, &ret) < 0)
+ ret = -1;
+
+ return ret;
+}
+
+
+static int
+chConnectDomainEventDeregisterAny(virConnectPtr conn,
+ int callbackID)
+{
+ virCHDriver *driver = conn->privateData;
+
+ if (virConnectDomainEventDeregisterAnyEnsureACL(conn) < 0)
+ return -1;
+
+ if (virObjectEventStateDeregisterID(conn,
+ driver->domainEventState,
+ callbackID, true) < 0)
+ return -1;
+
+ return 0;
+}
+
+
/* Function Tables */
static virHypervisorDriver chHypervisorDriver = {
.name = "CH",
@@ -2262,6 +2338,8 @@ static virHypervisorDriver chHypervisorDriver = {
.domainHasManagedSaveImage = chDomainHasManagedSaveImage, /* 10.2.0 */
.domainRestore = chDomainRestore, /* 10.2.0 */
.domainRestoreFlags = chDomainRestoreFlags, /* 10.2.0 */
+ .connectDomainEventRegisterAny = chConnectDomainEventRegisterAny, /* 10.8.0 */
+ .connectDomainEventDeregisterAny = chConnectDomainEventDeregisterAny, /* 10.8.0 */
};
static virConnectDriver chConnectDriver = {
--
2.44.0
4 months, 2 weeks
[PATCH 00/10] PCI passthrough support for ch guests
by Praveen K Paladugu
This patch series introduces PCI passthrough support for ch guests. While
enabling this feature I refactored a bunch of methods from qemu to hypervisor
to reduce duplication of logic between the drivers.
Praveen K Paladugu (7):
hypervisor: move HostdevNeedsVFIO to hypervisor
hypervisor: move HostdevHostSupportsPassthroughVFIO
qemu: replace qemuHostdevPreparePCIDevices
ch: prepare domain definition for pci passthrough
ch: allow hostdev in domain definitions
ch: reattach PCI devices to host while stopping guest
ch: explicitly set INFILESIZE to 0
Wei Liu (3):
ch: add host device manager to driver
ch: add scaffolding for host devices management
ch: prepare host for PCI passthrough
po/POTFILES | 1 +
src/ch/ch_conf.h | 4 ++
src/ch/ch_domain.c | 2 +-
src/ch/ch_driver.c | 4 ++
src/ch/ch_hostdev.c | 115 +++++++++++++++++++++++++++++++++++
src/ch/ch_hostdev.h | 32 ++++++++++
src/ch/ch_monitor.c | 1 +
src/ch/ch_process.c | 74 +++++++++++++++++++++-
src/ch/meson.build | 2 +
src/hypervisor/virhostdev.c | 23 +++++++
src/hypervisor/virhostdev.h | 5 ++
src/libvirt_private.syms | 2 +
src/qemu/qemu_capabilities.c | 2 +-
src/qemu/qemu_cgroup.c | 5 +-
src/qemu/qemu_domain.c | 2 +-
src/qemu/qemu_driver.c | 2 +-
src/qemu/qemu_hostdev.c | 40 +-----------
src/qemu/qemu_hostdev.h | 10 ---
src/qemu/qemu_hotplug.c | 5 +-
src/qemu/qemu_namespace.c | 2 +-
tests/domaincapstest.c | 2 +-
21 files changed, 276 insertions(+), 59 deletions(-)
create mode 100644 src/ch/ch_hostdev.c
create mode 100644 src/ch/ch_hostdev.h
--
2.44.0
4 months, 2 weeks
[PATCH] virnetdevopenvswitch: Warn on unsupported QoS settings
by Michal Privoznik
Let me preface this with stating the obvious: documentation on
QoS in OVS is very sparse. This is all based on my observation
and OVS codebase analysis.
For the following QoS setting:
<bandwidth>
<inbound average="512" peak="1024" burst="32"/>
</bandwidth>
the following QoS setting is generated into OVS (NB, our XML
values are in KiB/s, OVS has them in bits/s):
# ovs-vsctl list qos
_uuid : a087226b-2da6-4575-ad4c-bf570cb812a9
external_ids : {ifname=vnet1, vm-id="7714e6b5-4885-4140-bc59-2f77cc99b3b5"}
other_config : {burst="262144", max-rate="8192000", min-rate="4096000"}
queues : {0=655bf3a7-e530-4516-9caf-ec9555dfbd4c}
type : linux-htb
from which the following topology is generated:
# for i in qdisc class; do tc -s -d -g $i show dev vnet1; done
qdisc htb 1: root refcnt 2 r2q 10 default 0x1 direct_packets_stat 0 ver 3.17 direct_qlen 1000
Sent 2186 bytes 16 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
+---(1:fffe) htb rate 8192Kbit ceil 8192Kbit linklayer ethernet burst 1499b/1mpu 60b cburst 1499b/1mpu 60b level 7
| Sent 2186 bytes 16 pkt (dropped 0, overlimits 0 requeues 0)
| backlog 0b 0p requeues 0
|
+---(1:1) htb prio 0 quantum 51200 rate 4096Kbit ceil 8192Kbit linklayer ethernet burst 32Kb/1mpu 60b cburst 32Kb/1mpu 60b level 0
Sent 2186 bytes 16 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
Long story short, the default class (1:) for an OVS interface has
average and peak set exactly as requested. But since it's nested
under another class (1:fffe), it can borrow unused bandwidth. And
the parent is set to have rate = ceil = peak from our XML. From
[1]: htb_tc_install() calls htb_parse_qdisc_details__() which
sets: 'hc->min_rate = hc->max_rate;' and then calls
htb_setup_class_(..., tc_make_handle(1, 0xfffe), tc_make_handle(1, 0), &hc);
to set up the top parent class.
In other words - the interface is set up to so that it can always
consume 'peak' bandwidth and there is no way for us to set it up
differently. It's too late to deny setting 'peak' different to
'average' at XML validation phase so do the next best thing -
throw a warning, just like we do in case <bandwidth/> is set for
an unsupported <interface/> type.
1: https://github.com/openvswitch/ovs/blob/main/lib/netdev-linux.c#L5039
Resolves: https://issues.redhat.com/browse/RHEL-53963
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
src/util/virnetdevopenvswitch.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/src/util/virnetdevopenvswitch.c b/src/util/virnetdevopenvswitch.c
index e23f4c83b6..598cfa0031 100644
--- a/src/util/virnetdevopenvswitch.c
+++ b/src/util/virnetdevopenvswitch.c
@@ -945,6 +945,10 @@ virNetDevOpenvswitchInterfaceSetQos(const char *ifname,
}
if (tx && tx->average) {
+ if (tx->peak && tx->peak != tx->average) {
+ VIR_WARN("Setting different 'peak' value than 'average' for QoS for OVS interface %s is unsupported",
+ ifname);
+ }
if (virNetDevOpenvswitchInterfaceSetTxQos(ifname, tx, vmuuid) < 0)
return -1;
} else {
@@ -954,6 +958,10 @@ virNetDevOpenvswitchInterfaceSetQos(const char *ifname,
}
if (rx) {
+ if (rx->peak && tx->peak != rx->average) {
+ VIR_WARN("Setting different 'peak' value than 'average' for QoS for OVS interface %s is unsupported",
+ ifname);
+ }
if (virNetDevOpenvswitchInterfaceSetRxQos(ifname, rx) < 0)
return -1;
} else {
--
2.45.2
4 months, 3 weeks
[PATCH v3 0/7] introduce job-change qmp command
by Vladimir Sementsov-Ogievskiy
Hi all!
Here is new job-change command - a replacement for (becoming deprecated)
block-job-change, as a first step of my "[RFC 00/15] block job API"
v3:
01: add a-b by Markus
03: add a-b by Markus, s/9.1/9.2/ in QAPI
05: update commit message, s/9.1/9.2/ in QAPI
06: update commit message (and subject!), s/9.1/9.2/ in deprecated.rst
Vladimir Sementsov-Ogievskiy (7):
qapi: rename BlockJobChangeOptions to JobChangeOptions
blockjob: block_job_change_locked(): check job type
qapi: block-job-change: make copy-mode parameter optional
blockjob: move change action implementation to job from block-job
qapi: add job-change
qapi/block-core: deprecate block-job-change
iotests/mirror-change-copy-mode: switch to job-change command
block/mirror.c | 13 +++++---
blockdev.c | 4 +--
blockjob.c | 20 ------------
docs/about/deprecated.rst | 5 +++
include/block/blockjob.h | 11 -------
include/block/blockjob_int.h | 7 -----
include/qemu/job.h | 12 +++++++
job-qmp.c | 15 +++++++++
job.c | 23 ++++++++++++++
qapi/block-core.json | 31 ++++++++++++++-----
.../tests/mirror-change-copy-mode | 2 +-
11 files changed, 90 insertions(+), 53 deletions(-)
--
2.34.1
4 months, 3 weeks
[PATCH v5 0/6] Add TPM emulator <source type='file/dir' path='..'/>
by marcandre.lureau@redhat.com
From: Marc-André Lureau <marcandre.lureau(a)redhat.com>
Hi,
When swtpm capabilities reports "nvram-backend-dir", it can accepts a single
file or block device where TPM state will be stored.
--tpmstate must be backend-uri=file://.
v5:
- fix indentation
- update doc about state sharing/locking
- add r-b from Stefan
v4:
- add "qemu: explicit swtpm state locking"
- add r-b from Stefan, first patch only atm
v3:
- changed to <source type='file/dir' path='..'/>
v2:
- add <source dir='..'/> support as well (Daniel)
Related: https://issues.redhat.com/browse/CNV-35250
Marc-André Lureau (6):
util: check swtpm nvram-backend-{dir,file} capabilities
tpm: rename 'storagepath' to 'source_path'
schema: add TPM emulator <source type='file' path='..'>
schema: add TPM emulator <source type='dir' path='..'>
qemu_tpm: handle file/block storage source
qemu: explicit swtpm state locking
docs/formatdomain.rst | 22 ++++
src/conf/domain_conf.c | 31 ++++-
src/conf/domain_conf.h | 12 +-
src/conf/schemas/domaincommon.rng | 26 ++++
src/qemu/qemu_tpm.c | 114 +++++++++++++-----
src/security/security_selinux.c | 6 +-
src/util/virtpm.c | 3 +
src/util/virtpm.h | 3 +
.../qemuxmlconfdata/tpm-emulator-tpm2-enc.xml | 1 +
tests/qemuxmlconfdata/tpm-emulator-tpm2.xml | 1 +
tests/testutilsqemu.c | 1 +
11 files changed, 187 insertions(+), 33 deletions(-)
--
2.47.0
4 months, 4 weeks
[PATCH v2] network: add rule to nftables backend that zeroes checksum of DHCP responses
by Laine Stump
Many long years ago (April 2010), soon after "vhost" in-kernel packet
processing was added to the virtio-net driver, people running RHEL5
virtual machines with a virtio-net interface connected via a libvirt
virtual network noticed that when vhost packet processing was enabled,
their VMs could no longer get an IP address via DHCP - the guest was
ignoring the DHCP response packets sent by the host.
(I've been informed by danpb that the same issue had been encountered,
and "fixed" even earlier than that, in 2006, with Xen as the
hypervisor.)
The "gory details" of the 2010 discussion are chronicled here:
https://lists.isc.org/pipermail/dhcp-hackers/2010-April/001835.html
but basically it was because the checksum of packets wasn't being
fully computed on the host side (because QEMU on the host and the NIC
driver in the guest had agreed between themselves to turn off
checksums because they were unnecessary due to the "link" between the
two being entirely in local memory and not a physical cable), but
1) a partial checksum had been put into the packets at some point by
"someone"
2) the "don't use checksums" info was known by the guest kernel, which
would properly ignore the "bad" checksum), and
3) the packets were being read by the dhclient application on the
guest side with a "raw" socket (thus bypassing the guest kernel UDP
processing that would have known the checksum was irrelevant)),
The "fix" for this ended up being two-tiered:
1) The ISC DHCP package (which contains the aforementioned dhclient
program) made a fix to their dhclient code which caused it to accept
packets anyway even if they didn't have a proper checksum (NB: that's
not a full explanation, and possibly not accurate). This fixed the
problem for guests with an updated dhclient. Here is the code with the
fix to ISC DHCP:
https://github.com/isc-projects/dhcp/blob/master/common/packet.c#L365
This fixed the issue for any new/updated guests that had the fixed
dhclient, but it didn't solve the problem for existing/old guest
images that didn't/couldn't get their dhclient updated. This brings us
to:
2) iptables added a new "CHECKSUM" target and "--checksum-fill"
action:
http://patchwork.ozlabs.org/patch/58525/
and libvirt added an iptables rule for each virtual network to match
DHCP response packets and perform --checksum-fill. This way by the
time dhclient on the guest reads the raw packet, the checksum would be
corrected, and the packet would be accepted. This was pushed upstream
in libvirt commit v0.8.2-142-gfd5b15ff1a.
The word at the time from those more knowledgeable than me was that
the bad checksum problem was really specific to ISC's dhclient running
on Linux, and so once their fix was in use everywhere dhclient was
used, bad checksums would be a thing of the past and the
--checksum-fill iptables rules would no longer be needed (but would
otherwise be harmless if they were still there). (Plot twist: the
dhclient code in fix (1) above apparently is on a Linux-only code path
- this is very important later!)
Based on this information (and also due to the opinion that fixing it
by having iptables modify the packet checksum was really the wrong way
to permanently fix things, i.e. an "ugly hack"), the nftables
developers made the decision to not implement an equivalent to
--checksum-fill in nftables. As a result, when I wrote the nftables
firewall backend for libvirt virtual networks earlier this year, it
didn't add in any rule to "fix" broken UDP checksums (since there was
apparently no equivalent in nftables and, after all, that was fixed
somewhere else 14 years ago, right???)
But last week, when Rich Jones was doing routine testing using a Fedora
40 host (the first Fedora release to use the nftables backend of libvirt's
network driver by default) and a FreeBSD guest, for "some strange
reason", the FreeBSD guest was unable to get an IP address from DHCP!!
https://www.spinics.net/linux/fedora/libvirt-users/msg14356.html
A few quick tests proved that it was the same old "bad checksum"
problem from 2010 come back to haunt us - it wasn't a Linux-only issue
after all.
Phil Sutter and Eric Garver (nftables people) pointed out that, while
nftables doesn't have an action that will *compute* the checksum of a
packet, it *does* have an action that will set the checksum to 0, and
suggested we try adding a "zero the checksum" rule for dhcp response
packets to our nftables ruleset. (Why? Because a checksum value of 0
in a IPv4 UDP packet is defined by RFC768 to mean "no checksum
generated", implying "checksum not needed"). It turns out that this
works - dhclient properly recognizes that a 0 checksum means "don't
bother with the checksum", and accepts the packet as valid.
So to once again fix this timeless bug, this patch adds such a
checksum zeroing rule to the nftables rules setup for each virtual
network.
This has been verified (on a Fedora 40 host) to fix DHCP with FreeBSD
guests, while not breaking it for Fedora or Windows (10) guests.
Fixes: b89c4991daa0ee9371f10937fab3b03c5ffdabc6
Reported-by: Rich Jones <rjones(a)redhat.com>
Fix-Suggested-by: Eric Garver <egarver(a)redhat.com>
Fix-Suggested-by: Phil Sutter <psutter(a)redhat.com>
Signed-off-by: Laine Stump <laine(a)redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange(a)redhat.com>
---
(NB: jdenemar reminded me this is a bugfix, so we don't need to rush
to get it pushed before he freezes for the RC1 snapshot. I'm happy
with it as is though, if anyone is awake early enough and wants to
push it before he snapshots RC1. Otherwise I'll just push it later and
it will be in RC2)
Changes from V1:
* informational errors/omissions in commit log message fixed
src/network/network_nftables.c | 69 +++++++++++++++++++
tests/networkxml2firewalldata/base.nftables | 14 ++++
.../forward-dev-linux.nftables | 16 +++++
.../isolated-linux.nftables | 16 +++++
.../nat-default-linux.nftables | 16 +++++
.../nat-ipv6-linux.nftables | 16 +++++
.../nat-ipv6-masquerade-linux.nftables | 16 +++++
.../nat-many-ips-linux.nftables | 16 +++++
.../nat-port-range-ipv6-linux.nftables | 16 +++++
.../nat-port-range-linux.nftables | 16 +++++
.../nat-tftp-linux.nftables | 16 +++++
.../route-default-linux.nftables | 16 +++++
12 files changed, 243 insertions(+)
diff --git a/src/network/network_nftables.c b/src/network/network_nftables.c
index f8b5ab665d..5523207269 100644
--- a/src/network/network_nftables.c
+++ b/src/network/network_nftables.c
@@ -51,6 +51,7 @@ VIR_LOG_INIT("network.nftables");
#define VIR_NFTABLES_FWD_OUT_CHAIN "guest_output"
#define VIR_NFTABLES_FWD_X_CHAIN "guest_cross"
#define VIR_NFTABLES_NAT_POSTROUTE_CHAIN "guest_nat"
+#define VIR_NFTABLES_MANGLE_POSTROUTE_CHAIN "postroute_mangle"
/* we must avoid using the standard "filter" table as used by
* iptables, as any subsequent attempts to use iptables commands will
@@ -106,6 +107,10 @@ nftablesGlobalChain nftablesChains[] = {
/* chains for NAT rules */
{NULL, VIR_NFTABLES_NAT_POSTROUTE_CHAIN, "{ type nat hook postrouting priority 100; policy accept; }"},
+
+ /* chain for "mangle" rules that modify packets (e.g. 0 out UDP checksums) */
+ {NULL, VIR_NFTABLES_MANGLE_POSTROUTE_CHAIN, "{ type filter hook postrouting priority 0; policy accept; }"},
+
};
@@ -644,6 +649,44 @@ nftablesAddDontMasquerade(virFirewall *fw,
}
+/**
+ * nftablesAddOutputFixUdpChecksum:
+ *
+ * Add a rule to @fw that will 0 out the checksum of udp packets
+ * output from @iface with destination port @port.
+
+ * Zeroing the checksum of a UDP packet tells the receiving end "you
+ * don't need to validate the checksum", which is useful in cases
+ * where the host (sender) thinks that packet checksums will be
+ * computed elsewhere (and so leaves a partially computed checksum in
+ * the packet header) while the guest (receiver) thinks that the
+ * checksum has already been fully computed; in the meantime none of
+ * the code in between has actually finished computing the
+ * checksum.
+ *
+ * An example of this is DHCP response packets from host to
+ * guest. If the checksum of each of these packets isn't zeroed, then
+ * many guests (e.g. FreeBSD) will drop them with reason BAD CHECKSUM;
+ * if the packets arrive at those guests with a checksum of 0, they
+ * will happily accept the packet.
+ */
+static void
+nftablesAddOutputFixUdpChecksum(virFirewall *fw,
+ const char *iface,
+ int port)
+{
+ g_autofree char *portstr = g_strdup_printf("%d", port);
+
+ virFirewallAddCmd(fw, VIR_FIREWALL_LAYER_IPV4,
+ "insert", "rule", "ip",
+ VIR_NFTABLES_PRIVATE_TABLE,
+ VIR_NFTABLES_MANGLE_POSTROUTE_CHAIN,
+ "oif", iface, "udp", "dport", portstr,
+ "counter", "udp", "checksum", "set", "0",
+ NULL);
+}
+
+
static const char networkLocalMulticastIPv4[] = "224.0.0.0/24";
static const char networkLocalMulticastIPv6[] = "ff02::/16";
static const char networkLocalBroadcast[] = "255.255.255.255/32";
@@ -901,6 +944,30 @@ nftablesAddGeneralFirewallRules(virFirewall *fw,
}
+static void
+nftablesAddChecksumFirewallRules(virFirewall *fw,
+ virNetworkDef *def)
+{
+ size_t i;
+ virNetworkIPDef *ipv4def;
+
+ /* Look for the first IPv4 address that has dhcp or tftpboot
+ * defined. We support dhcp config on 1 IPv4 interface only.
+ */
+ for (i = 0; (ipv4def = virNetworkDefGetIPByIndex(def, AF_INET, i)); i++) {
+ if (ipv4def->nranges || ipv4def->nhosts)
+ break;
+ }
+
+ /* If we are doing local DHCP service on this network, add a rule
+ * that will fixup the checksum of DHCP response packets back to
+ * the guests.
+ */
+ if (ipv4def)
+ nftablesAddOutputFixUdpChecksum(fw, def->bridge, 68);
+}
+
+
static int
nftablesAddIPSpecificFirewallRules(virFirewall *fw,
virNetworkDef *def,
@@ -952,6 +1019,8 @@ nftablesAddFirewallRules(virNetworkDef *def, virFirewall **fwRemoval)
return -1;
}
+ nftablesAddChecksumFirewallRules(fw, def);
+
if (virFirewallApply(fw) < 0)
return -1;
diff --git a/tests/networkxml2firewalldata/base.nftables b/tests/networkxml2firewalldata/base.nftables
index a064318739..6371fc12dd 100644
--- a/tests/networkxml2firewalldata/base.nftables
+++ b/tests/networkxml2firewalldata/base.nftables
@@ -68,6 +68,13 @@ libvirt_network \
guest_nat \
'{ type nat hook postrouting priority 100; policy accept; }'
nft \
+add \
+chain \
+ip \
+libvirt_network \
+postroute_mangle \
+'{ type filter hook postrouting priority 0; policy accept; }'
+nft \
list \
table \
ip6 \
@@ -136,3 +143,10 @@ ip6 \
libvirt_network \
guest_nat \
'{ type nat hook postrouting priority 100; policy accept; }'
+nft \
+add \
+chain \
+ip6 \
+libvirt_network \
+postroute_mangle \
+'{ type filter hook postrouting priority 0; policy accept; }'
diff --git a/tests/networkxml2firewalldata/forward-dev-linux.nftables b/tests/networkxml2firewalldata/forward-dev-linux.nftables
index 8badb74beb..9dea1a88a4 100644
--- a/tests/networkxml2firewalldata/forward-dev-linux.nftables
+++ b/tests/networkxml2firewalldata/forward-dev-linux.nftables
@@ -156,3 +156,19 @@ daddr \
224.0.0.0/24 \
counter \
return
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
diff --git a/tests/networkxml2firewalldata/isolated-linux.nftables b/tests/networkxml2firewalldata/isolated-linux.nftables
index d1b4dac178..67ee0a2bf5 100644
--- a/tests/networkxml2firewalldata/isolated-linux.nftables
+++ b/tests/networkxml2firewalldata/isolated-linux.nftables
@@ -62,3 +62,19 @@ oif \
virbr0 \
counter \
accept
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
diff --git a/tests/networkxml2firewalldata/nat-default-linux.nftables b/tests/networkxml2firewalldata/nat-default-linux.nftables
index 28508292f9..951a5a6d60 100644
--- a/tests/networkxml2firewalldata/nat-default-linux.nftables
+++ b/tests/networkxml2firewalldata/nat-default-linux.nftables
@@ -142,3 +142,19 @@ daddr \
224.0.0.0/24 \
counter \
return
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
diff --git a/tests/networkxml2firewalldata/nat-ipv6-linux.nftables b/tests/networkxml2firewalldata/nat-ipv6-linux.nftables
index d8a9ba706d..617ed8b753 100644
--- a/tests/networkxml2firewalldata/nat-ipv6-linux.nftables
+++ b/tests/networkxml2firewalldata/nat-ipv6-linux.nftables
@@ -200,3 +200,19 @@ oif \
virbr0 \
counter \
accept
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
diff --git a/tests/networkxml2firewalldata/nat-ipv6-masquerade-linux.nftables b/tests/networkxml2firewalldata/nat-ipv6-masquerade-linux.nftables
index a7f09cda59..a710d0e296 100644
--- a/tests/networkxml2firewalldata/nat-ipv6-masquerade-linux.nftables
+++ b/tests/networkxml2firewalldata/nat-ipv6-masquerade-linux.nftables
@@ -272,3 +272,19 @@ daddr \
ff02::/16 \
counter \
return
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
diff --git a/tests/networkxml2firewalldata/nat-many-ips-linux.nftables b/tests/networkxml2firewalldata/nat-many-ips-linux.nftables
index b826fe6134..0be5fb7e65 100644
--- a/tests/networkxml2firewalldata/nat-many-ips-linux.nftables
+++ b/tests/networkxml2firewalldata/nat-many-ips-linux.nftables
@@ -366,3 +366,19 @@ daddr \
224.0.0.0/24 \
counter \
return
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
diff --git a/tests/networkxml2firewalldata/nat-port-range-ipv6-linux.nftables b/tests/networkxml2firewalldata/nat-port-range-ipv6-linux.nftables
index ceaed6fa40..7574356855 100644
--- a/tests/networkxml2firewalldata/nat-port-range-ipv6-linux.nftables
+++ b/tests/networkxml2firewalldata/nat-port-range-ipv6-linux.nftables
@@ -384,3 +384,19 @@ daddr \
ff02::/16 \
counter \
return
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
diff --git a/tests/networkxml2firewalldata/nat-port-range-linux.nftables b/tests/networkxml2firewalldata/nat-port-range-linux.nftables
index 1dc37a26ec..127536e4db 100644
--- a/tests/networkxml2firewalldata/nat-port-range-linux.nftables
+++ b/tests/networkxml2firewalldata/nat-port-range-linux.nftables
@@ -312,3 +312,19 @@ oif \
virbr0 \
counter \
accept
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
diff --git a/tests/networkxml2firewalldata/nat-tftp-linux.nftables b/tests/networkxml2firewalldata/nat-tftp-linux.nftables
index 28508292f9..951a5a6d60 100644
--- a/tests/networkxml2firewalldata/nat-tftp-linux.nftables
+++ b/tests/networkxml2firewalldata/nat-tftp-linux.nftables
@@ -142,3 +142,19 @@ daddr \
224.0.0.0/24 \
counter \
return
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
diff --git a/tests/networkxml2firewalldata/route-default-linux.nftables b/tests/networkxml2firewalldata/route-default-linux.nftables
index 282c9542a5..be9c4f5439 100644
--- a/tests/networkxml2firewalldata/route-default-linux.nftables
+++ b/tests/networkxml2firewalldata/route-default-linux.nftables
@@ -56,3 +56,19 @@ oif \
virbr0 \
counter \
accept
+nft \
+-ae insert \
+rule \
+ip \
+libvirt_network \
+postroute_mangle \
+oif \
+virbr0 \
+udp \
+dport \
+68 \
+counter \
+udp \
+checksum \
+set \
+0
--
2.47.0
4 months, 4 weeks
[PATCH] qemu: Fix maximum physical address size in baseline CPU
by Jiri Denemark
We should include maximum physical address size in the CPU definition
created by virConnectBaselineHypervisorCPU only if we know the value for
all input CPUs. Otherwise we would create a CPU definition that is not
usable on all hosts from which we gathered the CPU info.
https://issues.redhat.com/browse/RHEL-24850
Signed-off-by: Jiri Denemark <jdenemar(a)redhat.com>
---
src/qemu/qemu_driver.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 72211da137..72a9542c0b 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -11914,10 +11914,12 @@ qemuConnectBaselineHypervisorCPU(virConnectPtr conn,
goto cleanup;
for (i = 0; i < ncpus; i++) {
- if (!cpus[i]->addr || cpus[i]->addr->limit == 0)
+ if (!cpus[i]->addr || cpus[i]->addr->limit == 0) {
+ physAddrSize = 0;
continue;
+ }
- if (physAddrSize == 0 || cpus[i]->addr->limit < physAddrSize)
+ if (i == 0 || cpus[i]->addr->limit < physAddrSize)
physAddrSize = cpus[i]->addr->limit;
}
--
2.47.0
5 months
[Call for Presentations] FOSDEM 2025: Virt & IaaS devroom
by Kashyap Chamarthy
(Cross-posted to KVM, Rust-VMM, QEMU, and libvirt lists.)
Hi,
The CFP for the 'Virt & IaaS' (Infrastructure as a Service) 2025
"devroom" is out[+].
- Where? — Brussels, Belgium
- When? — 02 Feb (Sunday) 2025
- Talk submission deadline — 01 Dec 2024
(The CFP submission on the FOSDEM list[+] says 08 Dec 2024 is the
submission deadline in its intro paragraph. But I clarified it with
Piotr Kliczewski, the deddline is *01* Dec 2024 and _not_ 08th Dec;
I updated it below.)
========================================================================
This devroom is a collaborative effort, and is organized by dedicated
folks from projects such as OpenStack, Xen Project, KubeVirt, QEMU, KVM,
and Foreman. We invite everyone involved in these fields to submit your
proposals by December *1st* 2024.
Important Dates
---------------
Submission deadline: 1st December 2024
Acceptance notifications: 10th December 2024
Final schedule announcement: 15th December 2024
Devroom: 2nd February 2025
About the Devroom
-----------------
The Virtualization & IaaS devroom will feature session topics such as open
source hypervisors or virtual machine managers such as Xen Project, KVM,
bhyve and VirtualBox as well as Infrastructure-as-a-Service projects such
as KubeVirt, Apache CloudStack, OpenStack, QEMU and OpenNebula.
This devroom will host presentations that focus on topics of shared
interest, such as KVM; libvirt; shared storage; virtualized networking;
cloud security; clustering and high availability; interfacing with multiple
hypervisors; hyperconverged deployments; and scaling across hundreds or
thousands of servers.
Presentations in this devroom will be aimed at developers working on these
platforms who are looking to collaborate and improve shared infrastructure
or solve common problems. We seek topics that encourage dialog between
projects and continued work post-FOSDEM.
Submit Your Proposal
--------------------
All submissions must be made via the Pretalx event planning site[1]. It is
a new submission system so you will need to create an account. If you
submitted proposals for FOSDEM in previous years, you won’t be able to use
your existing account.
During submission please make sure to select Virtualization and Cloud
infrastructure from the Track list. Please provide a meaningful abstract
and description of your proposed session.
Submission Guidelines
---------------------
We expect more proposals than we can possibly accept, so it is vitally
important that you submit your proposal on or before the deadline. Late
submissions are unlikely to be considered.
All presentation slots are 30 minutes, with 20 minutes planned for
presentations, and 10 minutes for Q&A.
All presentations will be recorded and made available under Creative
Commons licenses. In the Submission notes field, please indicate that you
agree that your presentation will be licensed under the CC-By-SA-4.0 or
CC-By-4.0 license and that you agree to have your presentation recorded.
For example:
"If my presentation is accepted for FOSDEM, I hereby agree to license all
recordings, slides, and other associated materials under the Creative
Commons Attribution Share-Alike 4.0 International License.
Sincerely,
<NAME>."
In the Submission notes field, please also confirm that if your talk is
accepted, you will be able to attend FOSDEM and deliver your presentation.
We will not consider proposals from prospective speakers who are unsure
whether they will be able to secure funds for travel and lodging to attend
FOSDEM. (Sadly, we are not able to offer travel funding for prospective
speakers.)
Code of Conduct
---------------
Following the release of the updated code of conduct for FOSDEM[3], we'd
like to remind all speakers and attendees that all of the presentations and
discussions in our devroom are held under the guidelines set in the CoC and
we expect attendees, speakers, and volunteers to follow the CoC at all
times.
If you submit a proposal and it is accepted, you will be required to
confirm that you accept the FOSDEM CoC. If you have any questions about the
CoC or wish to have one of the devroom organizers review your presentation
slides or any other content for CoC compliance, please email us and we will
do our best to assist you.
Questions?
----------
If you have any questions about this devroom, please send your questions to
our devroom mailing list. You can also subscribe to the list to receive
updates about important dates, session announcements, and to connect with
other attendees.
See you all at FOSDEM!
[1] https://fosdem.org/submit
[2] virtualization-devroom-manager at fosdem.org
[3] https://fosdem.org/2025/practical/conduct/
========================================================================
[+] https://lists.fosdem.org/pipermail/fosdem/2024q4/003574.html
--
/kashyap
5 months