[libvirt] [PATCH] Allow a per-PCI passthrough device permissive attribute
by Chris Lalancette
Currently there is a global tag to let the administrator
turn off system-wide ACS checking when doing PCI device
passthrough. However, this is too coarse-grained of an
attribute, since it doesn't allow setups where certain
guests are trusted while other ones are untrusted. Allow
more complicated setups by making the device checking
a per-device setting.
The more detailed explanation of why this is necessary
delves deep into PCIe internals. Ideally we'd like
to be able to probe devices and figure out whether it
is safe to assign them. In practice, this isn't possible
because PCIe allows devices to have "hidden" bridges
that software can't discover. If you were to have
two devices assigned to two different domains behind
one of these hidden bridges, they could do P2P traffic
and bypass all of the VT-d/IOMMU checks.
The next thing we could try to do is to have a whitelist
of devices that we know to be safe. For instance, instead
of a "hidden" bridge, PCI devices can multiplex functions
instead, which causes all traffic to head to an upstream
bridge before P2P can take place. Additionally, some
"hidden" PCI bridges may have ACS on-board. In both of
these cases it's safe to passthrough the device(s), since
they can't P2P without the IOMMU getting involved.
However, even if we did have a whitelist, I think we still
need a permissive attribute. For one thing, the whitelist
will always be out of date with respect to new hardware,
so we'd need to allow administrators to temporarily
override the whitelist restriction until a new version of
the whitelist came out. Also, we want to support the case
where the administrator knows it is safe to assign possibly
unsafe devices to a domain he trusts.
Signed-off-by: Chris Lalancette <clalance(a)redhat.com>
---
docs/schemas/domain.rng | 6 ++++++
src/conf/domain_conf.c | 14 +++++++++++---
src/conf/domain_conf.h | 1 +
src/libvirt_private.syms | 2 ++
src/qemu/qemu_driver.c | 1 +
src/util/pci.c | 13 ++++++++++++-
src/util/pci.h | 3 +++
7 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng
index 827ff6f..9f90f4d 100644
--- a/docs/schemas/domain.rng
+++ b/docs/schemas/domain.rng
@@ -1175,6 +1175,12 @@
<value>no</value>
</choice>
</attribute>
+ <attribute name="permissive">
+ <choice>
+ <value>yes</value>
+ <value>no</value>
+ </choice>
+ </attribute>
</optional>
<group>
<element name="source">
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index e548d1d..899967d 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2929,7 +2929,7 @@ virDomainHostdevDefParseXML(virConnectPtr conn,
xmlNodePtr cur;
virDomainHostdevDefPtr def;
- char *mode, *type = NULL, *managed = NULL;
+ char *mode, *type = NULL, *managed = NULL, *permissive = NULL;
if (VIR_ALLOC(def) < 0) {
virReportOOMError(conn);
@@ -2968,6 +2968,13 @@ virDomainHostdevDefParseXML(virConnectPtr conn,
VIR_FREE(managed);
}
+ permissive = virXMLPropString(node, "permissive");
+ if (permissive != NULL) {
+ if (STREQ(permissive, "yes"))
+ def->permissive = 1;
+ VIR_FREE(permissive);
+ }
+
cur = node->children;
while (cur != NULL) {
if (cur->type == XML_ELEMENT_NODE) {
@@ -5243,8 +5250,9 @@ virDomainHostdevDefFormat(virConnectPtr conn,
return -1;
}
- virBufferVSprintf(buf, " <hostdev mode='%s' type='%s' managed='%s'>\n",
- mode, type, def->managed ? "yes" : "no");
+ virBufferVSprintf(buf, " <hostdev mode='%s' type='%s' managed='%s' permissive='%s'>\n",
+ mode, type, def->managed ? "yes" : "no",
+ def->permissive ? "yes" : "no");
virBufferAddLit(buf, " <source>\n");
if (def->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB) {
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 7be090d..57ab4b4 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -475,6 +475,7 @@ typedef virDomainHostdevDef *virDomainHostdevDefPtr;
struct _virDomainHostdevDef {
int mode; /* enum virDomainHostdevMode */
unsigned int managed : 1;
+ unsigned int permissive : 1;
union {
struct {
int type; /* enum virDomainHostdevBusType */
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d56fb7d..49e222e 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -441,6 +441,8 @@ pciWaitForDeviceCleanup;
pciResetDevice;
pciDeviceSetManaged;
pciDeviceGetManaged;
+pciDeviceSetPermissive;
+pciDeviceGetPermissive;
pciDeviceListNew;
pciDeviceListFree;
pciDeviceListAdd;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 5bf6743..9848f1c 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -2137,6 +2137,7 @@ qemuGetPciHostDeviceList(virConnectPtr conn,
}
pciDeviceSetManaged(dev, hostdev->managed);
+ pciDeviceSetPermissive(dev, hostdev->permissive);
}
return list;
diff --git a/src/util/pci.c b/src/util/pci.c
index 0274806..42a0be6 100644
--- a/src/util/pci.c
+++ b/src/util/pci.c
@@ -64,6 +64,7 @@ struct _pciDevice {
unsigned has_flr : 1;
unsigned has_pm_reset : 1;
unsigned managed : 1;
+ unsigned permissive : 1;
};
struct _pciDeviceList {
@@ -1076,6 +1077,16 @@ unsigned pciDeviceGetManaged(pciDevice *dev)
return dev->managed;
}
+void pciDeviceSetPermissive(pciDevice *dev, unsigned permissive)
+{
+ dev->permissive = !!permissive;
+}
+
+unsigned pciDeviceGetPermissive(pciDevice *dev)
+{
+ return dev->permissive;
+}
+
pciDeviceList *
pciDeviceListNew(virConnectPtr conn)
{
@@ -1353,7 +1364,7 @@ int pciDeviceIsAssignable(virConnectPtr conn,
return 0;
if (ret) {
- if (!strict_acs_check) {
+ if (!strict_acs_check || dev->permissive) {
VIR_DEBUG("%s %s: strict ACS check disabled; device assignment allowed",
dev->id, dev->name);
} else {
diff --git a/src/util/pci.h b/src/util/pci.h
index e6ab137..3f98374 100644
--- a/src/util/pci.h
+++ b/src/util/pci.h
@@ -44,6 +44,9 @@ int pciResetDevice (virConnectPtr conn,
void pciDeviceSetManaged(pciDevice *dev,
unsigned managed);
unsigned pciDeviceGetManaged(pciDevice *dev);
+void pciDeviceSetPermissive(pciDevice *dev,
+ unsigned permissive);
+unsigned pciDeviceGetPermissive(pciDevice *dev);
pciDeviceList *pciDeviceListNew (virConnectPtr conn);
void pciDeviceListFree (virConnectPtr conn,
--
1.6.6
14 years, 10 months
[libvirt] [PATCH 1/3] Clarify why some controllers don't generate -device string in QEMU driver
by Matthew Booth
The QEMU driver contained code to generate a -device string for piix4-ide, but
wasn't using it. This change removes this string generation. It also adds a
comment explaining why IDE and FDC controllers don't generate -device strings.
The change also generates an error if a sata controller is specified for a QEMU
domain, as this isn't supported.
* src/qemu/qemu_conf.c: Remove VIR_DOMAIN_CONTROLLER_TYPE_IDE handler in
qemuBuildControllerDevStr(). Ignore IDE and FDC
controllers. Error if SATA controller is discovered. Add
comments.
---
src/qemu/qemu_conf.c | 26 ++++++++++++++++++--------
1 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index f4a6c08..3b7793f 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -2121,11 +2121,8 @@ qemuBuildControllerDevStr(virDomainControllerDefPtr def)
virBufferVSprintf(&buf, ",id=scsi%d", def->idx);
break;
+ /* We always get an IDE controller, whether we want it or not. */
case VIR_DOMAIN_CONTROLLER_TYPE_IDE:
- virBufferAddLit(&buf, "piix4-ide");
- virBufferVSprintf(&buf, ",id=ide%d", def->idx);
- break;
-
default:
goto error;
}
@@ -3141,16 +3138,29 @@ int qemudBuildCommandLine(virConnectPtr conn,
if (qemuCmdFlags & QEMUD_CMD_FLAG_DEVICE) {
for (i = 0 ; i < def->ncontrollers ; i++) {
- char *scsi;
- if (def->controllers[i]->type != VIR_DOMAIN_CONTROLLER_TYPE_SCSI)
+ virDomainControllerDefPtr cont = def->controllers[i];
+
+ /* We don't add an explicit IDE or FD controller because the
+ * provided PIIX4 device already includes one. It isn't possible to
+ * remove the PIIX4. */
+ if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_IDE ||
+ cont->type == VIR_DOMAIN_CONTROLLER_TYPE_FDC)
continue;
+ /* QEMU doesn't implement a SATA driver */
+ if (cont->type == VIR_DOMAIN_CONTROLLER_TYPE_SATA) {
+ qemudReportError(conn, NULL, NULL, VIR_ERR_NO_SUPPORT,
+ "%s", _("SATA is not supported with this QEMU binary"));
+ goto error;
+ }
+
ADD_ARG_LIT("-device");
- if (!(scsi = qemuBuildControllerDevStr(def->controllers[i])))
+ char *devstr;
+ if (!(devstr = qemuBuildControllerDevStr(def->controllers[i])))
goto no_memory;
- ADD_ARG(scsi);
+ ADD_ARG(devstr);
}
}
--
1.6.6
14 years, 10 months
[libvirt] Libvirt guide draft published on libvirt.org
by David Jorm
I am a tech writer who recently joined the Red Hat team. I have been tasked with assisting in the improvement of libvirt documentation where possible and co-ordinating the development of the libvirt Application Development Guide. The guide was previously in the hands of Dani Coulson, who has since left Red Hat. She got it to a draft stage with a skeletal structure, but as far as I can tell nothing ever reached a publishable state. I've picked up where she left off and re-built the latest guide from the DocBook XML in git. It is now up at:
http://libvirt.org/guide/
If you look in the guide, you will notice an awful lot of "TBD" stubs. Contributions to fill these would be greatly appreciated - please email them to me directly. I will chase up with the people who were originally nominated as the responsible parties to try and get some content to flesh out the guide.
I don't have a lot of spare temporal bandwidth at the moment, but if there are any docs-related BZs or libvirt issues, feel free to push them my way and i'll do what I can. I think i've made every mistake possible so far in submitting patches, so I know the process by virtue of what-not-to-do.
Thanks
David
14 years, 10 months
[libvirt] Libvirt/LXC creation example
by Avi Weit
Hi,
I am looking for a simple example for creating a Linux container (LXC) with
a network connection that can be accessed from the outside world. I first
tried to set up a bridge and link it with the interface eth0 configured on
host via: brctl addif virbr0 eth0 but each time I did that - I lost the
connection to the host and could not even ping it. The connection was
resumed only when removing the host interface of eth0 from the bridge:
brctl delif virbr0 eth0
How can I define the network interfaces to be configured inside the
container? How can I setup container IPs?
Any full XML example would be appreciated.
Thanks
- Avi
14 years, 10 months
[libvirt] [PATCH 0/1] Clean up udev init
by David Allan
The following patch contains cleanup for the udev init error handling
and Matthias' fix for the case in which priv is NULL when shutdown is
called.
Dave
14 years, 10 months
[libvirt] [PATCH 0/7] Update hotplug code to use device_add
by Daniel P. Berrange
I previously changed the command line syntax to use -device for starting
guests. The key reason for this was to allow unique names and PCI addresses
to be assigned to all devices. I forgot todo the same for hotplug :-(
This series is not yet complete, but it goes some of the way towards fixing
the hotplug code to use 'device_add' for creating devices. The main remaining
problem is that I'm nt assigning aliases to devices, to they all get the name
of '(null)'. Obviously I'll fix that before submitting again...
Daniel
14 years, 10 months
[libvirt] [PATCH] support XEN_SYSCTL_INTERFACE_VERSION
by Jim Fehlig
xen-unstable c/s 20762 bumped XEN_SYSCTL_INTERFACE_VERSION to 7. I
don't see how the interface change affects libvirt, other than failing
xenHypervisorInit() since version 7 is not tried.
The attached patch accommodates the upcoming Xen 4.0 release by checking
for XEN_SYSCTL_INTERFACE_VERSION 7. If found, it sets
XEN_DOMCTL_INTERFACE_VERSION to 6, which is also new to Xen 4.0.
Regards,
Jim
14 years, 10 months
[libvirt] [PATCH 0/1] Clean up udev init
by David Allan
Matthias' patch made me realize that I didn't write the udev init code
as cleanly as I'd like, and the error handling was starting to get a
little messy. The attached patch cleans it up. There should be no
functional change.
Dave
14 years, 10 months
[libvirt] [PATCH] Add missing sata controller type to domain.rng
by Matthew Booth
* docs/schemas/domain.rng: Add sata controller type
---
docs/schemas/domain.rng | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/docs/schemas/domain.rng b/docs/schemas/domain.rng
index 49b57eb..827ff6f 100644
--- a/docs/schemas/domain.rng
+++ b/docs/schemas/domain.rng
@@ -529,6 +529,7 @@
<value>fdc</value>
<value>ide</value>
<value>scsi</value>
+ <value>sata</value>
</choice>
</attribute>
</optional>
--
1.6.6
14 years, 10 months
[libvirt] [PATCH] portability to non-glibc: don't use realpath(..., NULL)
by Jim Meyering
Using realpath like that is not portable, and providing
a buffer instead of NULL is wasteful and harder to code properly.
Instead, use gnulib's canonicalize_file_name, which does
the same job portably:
>From 4afea6b59e2be6c28b45ef59a6a2f896eed44dba Mon Sep 17 00:00:00 2001
From: Jim Meyering <meyering(a)redhat.com>
Date: Tue, 26 Jan 2010 17:13:45 +0100
Subject: [PATCH] portability to non-glibc: don't use realpath(..., NULL)
it causes a NULL-dereference on some systems like Solaris 10.
* src/node_device/node_device_linux_sysfs.c. Include <stdlib.h>.
(get_sriov_function): Use canonicalize_filen_name, not realpath.
* bootstrap (modules): Add canonicalize-lgpl.
---
bootstrap | 1 +
src/node_device/node_device_linux_sysfs.c | 6 ++++--
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/bootstrap b/bootstrap
index aec5d05..cc3c6ef 100755
--- a/bootstrap
+++ b/bootstrap
@@ -68,6 +68,7 @@ modules='
areadlink
base64
c-ctype
+canonicalize-lgpl
close
connect
getaddrinfo
diff --git a/src/node_device/node_device_linux_sysfs.c b/src/node_device/node_device_linux_sysfs.c
index 33e658d..c1fce5d 100644
--- a/src/node_device/node_device_linux_sysfs.c
+++ b/src/node_device/node_device_linux_sysfs.c
@@ -24,6 +24,7 @@
#include <fcntl.h>
#include <sys/stat.h>
+#include <stdlib.h>
#include "node_device_driver.h"
#include "node_device_hal.h"
@@ -242,7 +243,8 @@ out:
static int get_sriov_function(const char *device_link,
struct pci_config_address **bdf)
{
- char *device_path = NULL, *config_address = NULL;
+ char *config_address = NULL;
+ char *device_path = NULL;
char errbuf[64];
int ret = SRIOV_ERROR;
@@ -259,7 +261,7 @@ static int get_sriov_function(const char *device_link,
}
- device_path = realpath(device_link, device_path);
+ device_path = canonicalize_file_name (device_link);
if (device_path == NULL) {
memset(errbuf, '\0', sizeof(errbuf));
VIR_ERROR("Failed to resolve device link '%s': '%s'", device_link,
--
1.7.0.rc0.140.gfbe7
14 years, 10 months