[PATCH] util: directly query KVM for TSC scaling support
by Daniel P. Berrangé
We currently query the host MSRs to determine if TSC scaling is
supported. This works OK when running privileged and can open
the /dev/cpu/0/msr. When unprivileged we fallback to querying
MSRs from /dev/kvm. This is incorrect because /dev/kvm only
reports accurate info for MSRs that are valid to use from inside
a guest. The TSC scaling support MSR is not, thus we always end
up reporting lack of TSC scaling when unprivileged.
The solution to this is easy, because KVM can directly report
whether TSC scaling is available, which matches what QEMU will
do at startup.
https://gitlab.com/libvirt/libvirt/-/issues/188
Reported-by: Roman Mohr <rmohr(a)redhat.com>
Signed-off-by: Daniel P. Berrangé <berrange(a)redhat.com>
---
src/util/virhostcpu.c | 19 +++++++------------
1 file changed, 7 insertions(+), 12 deletions(-)
diff --git a/src/util/virhostcpu.c b/src/util/virhostcpu.c
index 7aa92ad11d..f140610b47 100644
--- a/src/util/virhostcpu.c
+++ b/src/util/virhostcpu.c
@@ -1349,11 +1349,10 @@ virHostCPUGetMSR(unsigned long index,
virHostCPUTscInfo *
virHostCPUGetTscInfo(void)
{
- virHostCPUTscInfo *info;
+ g_autofree virHostCPUTscInfo *info = g_new0(virHostCPUTscInfo, 1);
VIR_AUTOCLOSE kvmFd = -1;
VIR_AUTOCLOSE vmFd = -1;
VIR_AUTOCLOSE vcpuFd = -1;
- uint64_t msr = 0;
int rc;
if ((kvmFd = open(KVM_DEVICE, O_RDONLY)) < 0) {
@@ -1378,23 +1377,19 @@ virHostCPUGetTscInfo(void)
_("Unable to probe TSC frequency"));
return NULL;
}
-
- info = g_new0(virHostCPUTscInfo, 1);
-
info->frequency = rc * 1000ULL;
- if (virHostCPUGetMSR(VMX_PROCBASED_CTLS2_MSR, &msr) == 0) {
- /* High 32 bits of the MSR value indicate whether specific control
- * can be set to 1. */
- msr >>= 32;
-
- info->scaling = virTristateBoolFromBool(!!(msr & VMX_USE_TSC_SCALING));
+ if ((rc = ioctl(kvmFd, KVM_CHECK_EXTENSION, KVM_CAP_TSC_CONTROL)) < 0) {
+ virReportSystemError(errno, "%s",
+ _("Unable to query TSC scaling support"));
+ return NULL;
}
+ info->scaling = rc ? VIR_TRISTATE_BOOL_YES : VIR_TRISTATE_BOOL_NO;
VIR_DEBUG("Detected TSC frequency %llu Hz, scaling %s",
info->frequency, virTristateBoolTypeToString(info->scaling));
- return info;
+ return g_steal_pointer(&info);
}
#else
--
2.31.1
3 years, 4 months
[PATCH] Revert "remote: remove probing logic from virtproxyd dispatcher"
by Daniel P. Berrangé
This reverts commit 05bd8db60b35b7706100d9cbbaeb13992965e0f2.
It is true that the remote driver client now contains logic for probing
the driver to connect to when using modular daemons. This logic, however,
only runs when the remote driver is NOT running inside a daemon since we
don't want it activated inside libvirtd. Since the same remote driver
build is used in all daemons, we can't rely on it in virtproxyd either.
Thus we need to keep the virtproxyd probing logic
Signed-off-by: Daniel P. Berrangé <berrange(a)redhat.com>
---
src/remote/remote_daemon_dispatch.c | 77 +++++++++++++++++++++++++++++
1 file changed, 77 insertions(+)
diff --git a/src/remote/remote_daemon_dispatch.c b/src/remote/remote_daemon_dispatch.c
index 65aa20f7d1..36d4d00b79 100644
--- a/src/remote/remote_daemon_dispatch.c
+++ b/src/remote/remote_daemon_dispatch.c
@@ -1948,6 +1948,73 @@ void *remoteClientNew(virNetServerClient *client,
/*----- Functions. -----*/
+#ifdef VIRTPROXYD
+/*
+ * When running in virtproxyd regular auto-probing of drivers
+ * does not work as we don't have any drivers present (except
+ * stateless ones inside libvirt.so). All the interesting
+ * drivers are in separate daemons. Thus when we get a NULL
+ * URI we need to simulate probing that virConnectOpen would
+ * previously do. We use the existence of the UNIX domain
+ * socket as our hook for probing.
+ *
+ * This assumes no stale sockets left over from a now dead
+ * daemon, but that's reasonable since libvirtd unlinks
+ * sockets it creates on shutdown, or uses systemd activation
+ *
+ * We only try to probe for primary hypervisor drivers,
+ * not the secondary drivers.
+ */
+static int
+remoteDispatchProbeURI(bool readonly,
+ char **probeduri)
+{
+ g_autofree char *driver = NULL;
+ const char *suffix;
+ *probeduri = NULL;
+ VIR_DEBUG("Probing for driver daemon sockets");
+
+ /*
+ * If running root, either the daemon is running and the socket
+ * exists, or we're using socket activation so the socket exists
+ * too.
+ *
+ * If running non-root, the daemon may or may not already be
+ * running, and socket activation probably isn't relevant.
+ * So if no viable socket exists, we need to check which daemons
+ * are actually installed. This is not a big deal as only QEMU &
+ * VBox run as non-root, anyway.
+ */
+ if (geteuid() != 0) {
+ if (remoteProbeSessionDriverFromSocket(false, &driver) < 0)
+ return -1;
+
+ if (driver == NULL &&
+ remoteProbeSessionDriverFromBinary(&driver) < 0)
+ return -1;
+
+ suffix = "session";
+ } else {
+ if (remoteProbeSystemDriverFromSocket(readonly, &driver) < 0)
+ return -1;
+
+ suffix = "system";
+ }
+
+ /* Even if we didn't probe any socket, we won't
+ * return error. Just let virConnectOpen's normal
+ * logic run which will likely return an error anyway
+ */
+ if (!driver)
+ return 0;
+
+ *probeduri = g_strdup_printf("%s:///%s", driver, suffix);
+ VIR_DEBUG("Probed URI %s for driver %s", *probeduri, driver);
+ return 0;
+}
+#endif /* VIRTPROXYD */
+
+
static int
remoteDispatchConnectOpen(virNetServer *server G_GNUC_UNUSED,
virNetServerClient *client,
@@ -1956,6 +2023,9 @@ remoteDispatchConnectOpen(virNetServer *server G_GNUC_UNUSED,
struct remote_connect_open_args *args)
{
const char *name;
+#ifdef VIRTPROXYD
+ g_autofree char *probeduri = NULL;
+#endif
unsigned int flags;
struct daemonClientPrivate *priv = virNetServerClientGetPrivateData(client);
int rv = -1;
@@ -1984,6 +2054,13 @@ remoteDispatchConnectOpen(virNetServer *server G_GNUC_UNUSED,
priv->readonly = flags & VIR_CONNECT_RO;
#ifdef VIRTPROXYD
+ if (!name || STREQ(name, "")) {
+ if (remoteDispatchProbeURI(priv->readonly, &probeduri) < 0)
+ goto cleanup;
+
+ name = probeduri;
+ }
+
preserveIdentity = true;
#endif /* VIRTPROXYD */
--
2.31.1
3 years, 4 months
[libvirt PATCHv2 0/6] kill whole virtiofsd process group (virtio-fs epopee)
by Ján Tomko
v2 of:
https://listman.redhat.com/archives/libvir-list/2021-June/msg00570.html
Ján Tomko (6):
virProcessKillPainfullyDelay: use 'rc' variable
util: Introduce virProcessGroupKill
util: introduce virProcessGroupGet
util: virPidFileForceCleanupPath: add group argument
qemu: virtiofs: kill the whole process group
util: fix typo
src/libvirt_private.syms | 1 +
src/qemu/qemu_process.c | 3 ++-
src/qemu/qemu_virtiofs.c | 2 +-
src/util/virpidfile.c | 15 ++++++++++--
src/util/virpidfile.h | 2 ++
src/util/virprocess.c | 53 +++++++++++++++++++++++++++++++++++-----
src/util/virprocess.h | 5 +++-
7 files changed, 70 insertions(+), 11 deletions(-)
--
2.31.1
3 years, 4 months
[libvirt PATCH 0/3] add support for Fibre Channel VMID
by Pavel Hrdina
Pavel Hrdina (3):
vircgroup: introduce virCgroupGetInode function
conf: introduce support for Fibre Channel VMID
qemu: implement support for Fibre Channel VMID
docs/formatdomain.rst | 13 ++++++++++
docs/schemas/domaincommon.rng | 11 +++++++++
src/conf/domain_conf.c | 5 ++++
src/conf/domain_conf.h | 2 ++
src/conf/domain_validate.c | 19 +++++++++++++++
src/libvirt_private.syms | 1 +
src/qemu/qemu_cgroup.c | 32 +++++++++++++++++++++++++
src/util/vircgroup.c | 37 +++++++++++++++++++++++++++++
src/util/vircgroup.h | 2 ++
tests/genericxml2xmlindata/vmid.xml | 19 +++++++++++++++
tests/genericxml2xmltest.c | 2 ++
11 files changed, 143 insertions(+)
create mode 100644 tests/genericxml2xmlindata/vmid.xml
--
2.31.1
3 years, 4 months
Re: [PATCH for-6.1? v2 5/7] job: Add job_cancel_requested()
by Kevin Wolf
[ Peter, the question for you is at the end. ]
Am 04.08.2021 um 10:07 hat Max Reitz geschrieben:
> On 03.08.21 16:25, Kevin Wolf wrote:
> > Am 26.07.2021 um 16:46 hat Max Reitz geschrieben:
> > > Most callers of job_is_cancelled() actually want to know whether the job
> > > is on its way to immediate termination. For example, we refuse to pause
> > > jobs that are cancelled; but this only makes sense for jobs that are
> > > really actually cancelled.
> > >
> > > A mirror job that is cancelled during READY with force=false should
> > > absolutely be allowed to pause. This "cancellation" (which is actually
> > > a kind of completion) may take an indefinite amount of time, and so
> > > should behave like any job during normal operation. For example, with
> > > on-target-error=stop, the job should stop on write errors. (In
> > > contrast, force-cancelled jobs should not get write errors, as they
> > > should just terminate and not do further I/O.)
> > >
> > > Therefore, redefine job_is_cancelled() to only return true for jobs that
> > > are force-cancelled (which as of HEAD^ means any job that interprets the
> > > cancellation request as a request for immediate termination), and add
> > > job_cancel_request() as the general variant, which returns true for any
> > > jobs which have been requested to be cancelled, whether it be
> > > immediately or after an arbitrarily long completion phase.
> > >
> > > Buglink: https://gitlab.com/qemu-project/qemu/-/issues/462
> > > Signed-off-by: Max Reitz <mreitz(a)redhat.com>
> > > ---
> > > include/qemu/job.h | 8 +++++++-
> > > block/mirror.c | 10 ++++------
> > > job.c | 7 ++++++-
> > > 3 files changed, 17 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/include/qemu/job.h b/include/qemu/job.h
> > > index 8aa90f7395..032edf3c5f 100644
> > > --- a/include/qemu/job.h
> > > +++ b/include/qemu/job.h
> > > @@ -436,9 +436,15 @@ const char *job_type_str(const Job *job);
> > > /** Returns true if the job should not be visible to the management layer. */
> > > bool job_is_internal(Job *job);
> > > -/** Returns whether the job is scheduled for cancellation. */
> > > +/** Returns whether the job is being cancelled. */
> > > bool job_is_cancelled(Job *job);
> > > +/**
> > > + * Returns whether the job is scheduled for cancellation (at an
> > > + * indefinite point).
> > > + */
> > > +bool job_cancel_requested(Job *job);
> > I don't think non-force blockdev-cancel for mirror should actually be
> > considered cancellation, so what is the question that this function
> > answers?
> >
> > "Is this a cancelled job, or a mirror block job that is supposed to
> > complete soon, but only if it doesn't switch over the users to the
> > target on completion"?
>
> Well, technically yes, but it was more intended as “Has the user ever
> invoked (block-)job-cancel on this job?”.
I understand this, but is this much more useful to know than "Has the
user ever called HMP 'change'?", if you know what I mean?
> > Is this ever a reasonable question to ask, except maybe inside the
> > mirror implementation itself?
>
> I asked myself the same for v3, but found two places in job.c where I
> would like to keep it:
>
> First, there’s an assertion in job_completed_txn_abort(). All jobs
> other than @job have been force-cancelled, and so job_is_cancelled()
> would be true for them. As for @job itself, the function is mostly
> called when the job’s return value is not 0, but a soft-cancelled
> mirror does have a return value of 0 and so would not end up in that
> function.
> But job_cancel() invokes job_completed_txn_abort() if the job has been
> deferred to the main loop, which mostly correlates with the job having
> been completed (in which case the assertion is skipped), but not 100 %
> (there’s a small window between setting deferred_to_main_loop and the
> job changing to a completed state).
> So I’d prefer to keep the assertion as-is functionally, i.e. to only
> check job->cancelled.
Well, you don't. It's still job_is_cancelled() after this patch.
So the scenario you're concerned about is a job that has just finished
successfully (job->ret = 0) and then gets a cancel request?
With force=false, I'm pretty sure the code is wrong anyway because
calling job_completed_txn_abort() is not the right response. It should
return an error because you're trying to complete twice, possibly with
conflicting completion modes. Second best is just ignoring the cancel
request because we obviously already fulfilled the request of completing
the job (the completion mode might be different, though).
With force=true, arguably still letting the job fail is correct.
However, letting it fail involves more than just letting the transaction
fail. We would have to call job_update_rc() as well so that instead of
reporting success for the job, ECANCELED is returned and the job
transitions to JOB_STATUS_ABORTING (after which job_is_completed()
returns true).
So, just bugs to be fixed.
After this, I think we could even assert(job->ret != 0 ||
job->status == JOB_STATUS_PENDING) in job_completed_txn_abort().
ret == 0 can only happen when called from job_do_finalize(), when the
job is only failing because other jobs in the same transaction have
failed in their .prepare callback.
> Second, job_complete() refuses to let a job complete that has been
> cancelled. This function is basically only invoked by the user
> (through qmp_block_job_complete()/qmp_job_complete(), or
> job_complete_sync(), which comes from qemu-img), so I believe that it
> should correspond to the external interface we have right now; i.e.,
> if the user has invoked (block-)job-cancel at one point,
> job_complete() should generally return an error.
True. But it should also return an error if the user has invoked
job-complete at some point. The distinction between complete and
non-force cancel doesn't make sense there.
And cancelling with force=false should fail, too, when either job-cancel
or job-complete was called for the job before.
> > job_complete() is the only function outside of mirror that seems to use
> > it. But even there, it feels wrong to make a difference. Either we
> > accept redundant completion requests, or we don't. It doesn't really
> > matter how the job reconfigures the graph on completion. (Also, I feel
> > this should really have been part of the state machine, but I'm not sure
> > if we want to touch it now...)
>
> Well, yes, I don’t think it makes a difference because I don’t think
> anyone will first tell the job via block-job-cancel to complete
> without pivoting, and then change their mind and call
> block-job-complete after all. (Not least because that’s an error
> pre-series.)
Right, I'm just arguing that we shouldn't allow the opposite order
either. Currently I think we do, and it's buggy, as explained above.
> Also, I’m not even sure whether completing after a soft cancel request
> works. I don’t think any of our code accounts for such a case, so I’d
> rather avoid allowing it if there’s no need to allow it anyway.
Yes, definitely avoid it. We should allow only one completion request
(be it with job-complete or block-job-cancel) and return an error for
all future completion requests for the same job.
We could in theory keep allowing redundant completion requests when the
completion mode doesn't conflict, but I don't see the point of that.
Unless libvirt can actually issue multiple completion requests (again,
this includes both (block-)job-complete and non-force block-job-cancel
for mirror) for the same block job - Peter, I hope it doesn't?
Kevin
3 years, 4 months
[libvirt PATCH 0/6] rpm: enable modular daemons by default in Fedora 35 / RHEL 9
by Daniel P. Berrangé
Daniel P. Berrangé (6):
rpm: macroize logic for restarting daemons post-transaction
rpm: restart virtnwfilter/virnetworkd if configs change
rpm: restart modular daemons on upgrade
rpm: macroize logic for enabling/disabling daemons post/postun-install
rpm: handle enabling/disabling modular daemons post/postun-install
rpm: use direct remote connection for Fedora >= 35 / RHEL >= 9
libvirt.spec.in | 225 ++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 190 insertions(+), 35 deletions(-)
--
2.31.1
3 years, 4 months
[PATCH] qemu_migration: check for interface type 'hostdev'
by Kristina Hanicova
When we try to migrate vm, we check if it contains only devices
that are able to migrate. If a hostdev device is not able to
migrate we raise an error with <hostdev/>, but it can actually be
<interface/>, so we need to check if hostdev device was created
by us from interface and show the right error message.
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1942315
Signed-off-by: Kristina Hanicova <khanicov(a)redhat.com>
---
src/qemu/qemu_migration.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 4d651aeb1a..34eee9c8b6 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1272,9 +1272,15 @@ qemuMigrationSrcIsAllowedHostdev(const virDomainDef *def)
}
/* all other PCI hostdevs can't be migrated */
- virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
- _("cannot migrate a domain with <hostdev mode='subsystem' type='%s'>"),
- virDomainHostdevSubsysTypeToString(hostdev->source.subsys.type));
+ if (hostdev->parentnet) {
+ virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
+ _("cannot migrate a domain with <interface type='%s'>"),
+ virDomainNetTypeToString(hostdev->parentnet->type));
+ } else {
+ virReportError(VIR_ERR_OPERATION_UNSUPPORTED,
+ _("cannot migrate a domain with <hostdev mode='subsystem' type='%s'>"),
+ virDomainHostdevSubsysTypeToString(hostdev->source.subsys.type));
+ }
return false;
case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_LAST:
--
2.31.1
3 years, 4 months
[PATCH v4 0/4] Implement virDomainGetMessages for test driver
by Luke Yue
v4:
- Move testDomainObjCheckTaint to testParseDomains()
- Add CPU tainted and deprecation check
- Add a new xml with more tainted configs
Luke Yue (4):
conf: domain: Introduce and use virDomainObjGetMessages()
test_driver: Implement virDomainGetMessages
test_driver: Introduce testDomainObjCheckTaint
examples: test: Add a new test xml with more tainted configs for
testing
examples/xml/test/testdomfc5.xml | 51 +++++++++++++++
examples/xml/test/testnode.xml | 1 +
examples/xml/test/testnodeinline.xml | 51 +++++++++++++++
src/conf/domain_conf.c | 53 +++++++++++++++
src/conf/domain_conf.h | 5 ++
src/libvirt_private.syms | 1 +
src/qemu/qemu_driver.c | 34 +---------
src/test/test_driver.c | 98 ++++++++++++++++++++++++++++
tests/virshtest.c | 52 +++++++++++++--
9 files changed, 306 insertions(+), 40 deletions(-)
create mode 100644 examples/xml/test/testdomfc5.xml
--
2.32.0
3 years, 4 months
[PATCH] vmx: Parse vm.genid
by Michal Privoznik
The VMware metadata file contains genid but we are not parsing
and thus reporting it in domain XML. However, it's not as
straightforward as one might think. The UUID reported by VMware
is not in its usual string form, but split into two signed long
longs. That means, we have to do a bit of trickery when parsing.
But looking around it's the same magic that libguestfs does:
https://github.com/libguestfs/virt-v2v/blob/master/v2v/input_vmx.ml#L421
It's also explained by Rich on qemu-devel:
https://lists.nongnu.org/archive/html/qemu-devel/2018-07/msg02019.html
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1598348
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
I've successfully ran vmx2xmltest on an s390x machine which means that
there shouldn't be any endiandness problem.
src/vmx/vmx.c | 30 +++++++++++++++++++
.../vmx2xml-esx-in-the-wild-10.xml | 1 +
2 files changed, 31 insertions(+)
diff --git a/src/vmx/vmx.c b/src/vmx/vmx.c
index 1cd5a82227..04eabff18a 100644
--- a/src/vmx/vmx.c
+++ b/src/vmx/vmx.c
@@ -1337,6 +1337,32 @@ virVMXConfigScanResultsCollector(const char* name,
}
+static int
+virVMXParseGenID(virConf *conf,
+ virDomainDef *def)
+{
+ long long vmid[2] = { 0 };
+ g_autofree char *uuidstr = NULL;
+
+ if (virVMXGetConfigLong(conf, "vm.genid", &vmid[0], 0, true) < 0 ||
+ virVMXGetConfigLong(conf, "vm.genidX", &vmid[1], 0, true) < 0)
+ return -1;
+
+ if (vmid[0] == 0 && vmid[1] == 0)
+ return 0;
+
+ uuidstr = g_strdup_printf("%.16llx%.16llx", vmid[0], vmid[1]);
+ if (virUUIDParse(uuidstr, def->genid) < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("Could not parse UUID from string '%s'"), uuidstr);
+ return -1;
+ }
+ def->genidRequested = true;
+
+ return 0;
+}
+
+
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* VMX -> Domain XML
@@ -1466,6 +1492,10 @@ virVMXParseConfig(virVMXContext *ctx,
}
}
+ /* vmx:vm.genid + vm.genidX -> def:genid */
+ if (virVMXParseGenID(conf, def) < 0)
+ goto cleanup;
+
/* vmx:annotation -> def:description */
if (virVMXGetConfigString(conf, "annotation", &def->description,
true) < 0) {
diff --git a/tests/vmx2xmldata/vmx2xml-esx-in-the-wild-10.xml b/tests/vmx2xmldata/vmx2xml-esx-in-the-wild-10.xml
index b8c522af1f..47ed637920 100644
--- a/tests/vmx2xmldata/vmx2xml-esx-in-the-wild-10.xml
+++ b/tests/vmx2xmldata/vmx2xml-esx-in-the-wild-10.xml
@@ -1,6 +1,7 @@
<domain type='vmware'>
<name>w2019biosvmware</name>
<uuid>421a6177-5aa9-abb7-5924-fc376c18a1b4</uuid>
+ <genid>13c67c91-9f47-526f-b0d6-e4dd2e4bb4f9</genid>
<memory unit='KiB'>4194304</memory>
<currentMemory unit='KiB'>4194304</currentMemory>
<vcpu placement='static'>2</vcpu>
--
2.31.1
3 years, 4 months