On 09/20/13 13:56, Daniel P. Berrange wrote:
On Fri, Sep 20, 2013 at 11:07:00AM +0200, Peter Krempa wrote:
> Prefer using VFIO (if available) to the legacy KVM device passthrough.
>
> With this patch a PCI passthrough device without the driver configured
> will be started with VFIO if it's available on the host. If not legacy
> KVM passthrough is checked and error is reported if it's not available.
> ---
> docs/formatdomain.html.in | 9 ++++----
> src/conf/domain_conf.h | 2 +-
> src/qemu/qemu_command.c | 2 +-
> src/qemu/qemu_hotplug.c | 26 +++++++++++++++++++++-
> src/qemu/qemu_process.c | 18 ++++++++++++++-
> .../qemuxml2argv-hostdev-pci-address-device.args | 2 +-
> .../qemuxml2argvdata/qemuxml2argv-net-hostdev.args | 2 +-
> tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args | 4 ++--
> 8 files changed, 52 insertions(+), 13 deletions(-)
>
> diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
> index 3689399..6f3f7cf 100644
> --- a/docs/formatdomain.html.in
> +++ b/docs/formatdomain.html.in
> @@ -2755,11 +2755,10 @@
> backend, which is compatible with UEFI SecureBoot) or "kvm"
> (for the legacy device assignment handled directly by the KVM
> kernel module)<span class="since">Since 1.0.5 (QEMU and KVM
> - only, requires kernel 3.6 or newer)</span>. Currently,
"kvm"
> - is the default used by libvirt when not explicitly provided,
> - but since the two are functionally equivalent, this default
> - could be changed in the future with no impact to domains that
> - don't specify anything.
> + only, requires kernel 3.6 or newer)</span>. The default, when
> + the driver name is not explicitly specified, is to check wether
> + VFIO is available and use it if it's the case. If VFIO is not
> + available, the legacy "kvm" assignment is attempted.
> </dd>
> <dt><code>readonly</code></dt>
> <dd>Indicates that the device is readonly, only supported by SCSI host
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index 9414ebf..43d8746 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -399,7 +399,7 @@ enum virDomainHostdevSubsysType {
>
> /* the backend driver used for PCI hostdev devices */
> typedef enum {
> - VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* currently kvm, could change */
> + VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT, /* detect automaticaly, prefer VFIO */
> VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM, /* force legacy kvm style */
> VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO, /* force vfio */
>
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 1411533..0d7c02e 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -5395,13 +5395,13 @@ qemuBuildPCIHostdevDevStr(virDomainDefPtr def,
>
> switch ((virDomainHostdevSubsysPciBackendType)
> dev->source.subsys.u.pci.backend) {
> - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
> virBufferAddLit(&buf, "pci-assign");
> if (configfd && *configfd)
> virBufferAsprintf(&buf, ",configfd=%s", configfd);
> break;
>
> + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
> virBufferAddLit(&buf, "vfio-pci");
> break;
The code calling this function should have translated 'DEFAULT' into
either 'KVM' or 'VFIO', so if we see 'DEFAULT' here shouldn't
we
raise an error.
If we change this to produce error in case DEFAULT is passed, then all
the tests that are checking correctness of the generated commandline
with passthrough devices will fail or need to be hardcoded with driver
names as the default setting function can't be called in the testsuite.
Do you think it's better to change the tests so that "DEFAULT" is
avoided and error out if default is passed to the command line generator?
> diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c
> index 9320ab9..5a8fa44 100644
> --- a/src/qemu/qemu_hotplug.c
> +++ b/src/qemu/qemu_hotplug.c
> @@ -1144,6 +1144,31 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
> return -1;
>
> switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> + if (qemuHostdevHostSupportsPassthroughVFIO() &&
> + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
> + /* VFIO requires all of the guest's memory to be locked resident.
> + * In this case, the guest's memory may already be locked, but it
> + * doesn't hurt to "change" the limit to the same value.
> + */
> + if (vm->def->mem.hard_limit)
> + virProcessSetMaxMemLock(vm->pid, vm->def->mem.hard_limit);
> + else
> + virProcessSetMaxMemLock(vm->pid,
> + vm->def->mem.max_balloon + (1024 *
1024));
> + } else if (qemuHostdevHostSupportsPassthroughLegacy() &&
> + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE) ||
> + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> + } else {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("host doesn't support passthrough of "
> + "host PCI devices"));
> + }
> +
> + break;
> +
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
> if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> @@ -1170,7 +1195,6 @@ int qemuDomainAttachHostPciDevice(virQEMUDriverPtr driver,
>
> break;
>
> - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
> if (!qemuHostdevHostSupportsPassthroughLegacy()) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index e36ab99..9648aa0 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -3730,6 +3730,23 @@ int qemuProcessStart(virConnectPtr conn,
> }
>
> switch ((virDomainHostdevSubsysPciBackendType) *backend) {
> + case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> + if (supportsPassthroughVFIO &&
> + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_VFIO_PCI)) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO;
> + } else if (supportsPassthroughKVM &&
> + (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_PCIDEVICE)
||
> + virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))) {
> + *backend = VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM;
> + } else {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("host doesn't support passthrough of
"
> + "host PCI devices"));
> + goto cleanup;
> + }
> +
> + break;
There's alot of duplication between here & the hotplug code - can we get
this logic shared in a helper function ?
> +
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO:
> if (!supportsPassthroughVFIO) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> @@ -3738,7 +3755,6 @@ int qemuProcessStart(virConnectPtr conn,
> }
> break;
>
> - case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_DEFAULT:
> case VIR_DOMAIN_HOSTDEV_PCI_BACKEND_KVM:
> if (!supportsPassthroughKVM) {
> virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
> index 214b246..557b733 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-hostdev-pci-address-device.args
> @@ -2,5 +2,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test
QEMU_AUDIO_DRV=none \
> /usr/bin/qemu -S -M \
> pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \
> unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \
> -/dev/HostVG/QEMUGuest2 -device pci-assign,host=06:12.5,id=hostdev0,\
> +/dev/HostVG/QEMUGuest2 -device vfio-pci,host=06:12.5,id=hostdev0,\
> bus=pci.0,addr=0x3 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
> index 184811b..3a3963c 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-net-hostdev.args
> @@ -3,5 +3,5 @@ LC_ALL=C PATH=/bin HOME=/home/test USER=test LOGNAME=test
QEMU_AUDIO_DRV=none \
> -M pc -m 214 -smp 1 -nographic -nodefconfig -nodefaults -monitor \
> unix:/tmp/test-monitor,server,nowait -no-acpi -boot c \
> -usb -hda /dev/HostVG/QEMUGuest1 \
> --device pci-assign,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \
> +-device vfio-pci,host=03:07.1,id=hostdev0,bus=pci.0,addr=0x3 \
> -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4
> diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
> index c850613..615047a 100644
> --- a/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
> +++ b/tests/qemuxml2argvdata/qemuxml2argv-pci-rom.args
> @@ -7,7 +7,7 @@ unix:/tmp/test-monitor,server,nowait -no-acpi -boot c -usb -hda \
> -net user,vlan=0,name=hostnet0 \
> -device virtio-net-pci,vlan=1,id=net1,mac=52:54:00:24:a5:9e,bus=pci.0,addr=0x4,\
> romfile=/etc/fake/bootrom.bin -net user,vlan=1,name=hostnet1 \
> --device pci-assign,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \
> --device pci-assign,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\
> +-device vfio-pci,host=06:12.5,id=hostdev0,bus=pci.0,addr=0x5,rombar=0 \
> +-device vfio-pci,host=06:12.6,id=hostdev1,bus=pci.0,addr=0x6,rombar=1,\
> romfile=/etc/fake/bootrom.bin \
> -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7
Why is the XML here changing ? Shouldn't the test suite be stable in
what it generates regardless of what the host happens to support ?
The difference comes from the change of the default to VFIO in the
commandline generator function.
Danielx