On 16.05.2013 08:07, Chunyan Liu wrote:
> Write separate module for hostdev passthrough so that it could be used by all
> hypervisor drivers and maintain a global hostdev state.
>
> Signed-off-by: Chunyan Liu <cyliu(a)suse.com>
Some comments inline. Besides that seems to be working (with your next patch
for libxl) :)
> ---
> po/POTFILES.in | 1 +
> src/Makefile.am | 1 +
> src/libvirt.c | 5 +
> src/libvirt_private.syms | 15 +
> src/util/virhostdevmanager.c | 1218 ++++++++++++++++++++++++++++++++++++++++++
> src/util/virhostdevmanager.h | 91 ++++
> src/util/virpci.c | 17 +-
> src/util/virpci.h | 7 +-
> src/util/virusb.c | 19 +-
> src/util/virusb.h | 4 +-
> 10 files changed, 1362 insertions(+), 16 deletions(-)
> create mode 100644 src/util/virhostdevmanager.c
> create mode 100644 src/util/virhostdevmanager.h
>
> diff --git a/po/POTFILES.in b/po/POTFILES.in
> index f3ea4da..a7c21bf 100644
> --- a/po/POTFILES.in
> +++ b/po/POTFILES.in
> @@ -188,6 +188,7 @@ src/util/viruri.c
> src/util/virusb.c
> src/util/virutil.c
> src/util/virxml.c
> +src/util/virhostdevmanager.c
> src/vbox/vbox_MSCOMGlue.c
> src/vbox/vbox_XPCOMCGlue.c
> src/vbox/vbox_driver.c
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 4312c3c..a197c2b 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -130,6 +130,7 @@ UTIL_SOURCES = \
> util/virutil.c util/virutil.h \
> util/viruuid.c util/viruuid.h \
> util/virxml.c util/virxml.h \
> + util/virhostdevmanager.c util/virhostdevmanager.h \
> $(NULL)
>
>
> diff --git a/src/libvirt.c b/src/libvirt.c
> index 2b3515e..d9af5a6 100644
> --- a/src/libvirt.c
> +++ b/src/libvirt.c
> @@ -65,6 +65,7 @@
> #include "virthread.h"
> #include "virstring.h"
> #include "virutil.h"
> +#include "virhostdevmanager.h"
>
> #ifdef WITH_TEST
> # include "test/test_driver.h"
> @@ -827,6 +828,7 @@ int virStateInitialize(bool privileged,
> if (virInitialize() < 0)
> return -1;
>
> + virHostdevManagerInit();
> for (i = 0 ; i < virStateDriverTabCount ; i++) {
> if (virStateDriverTab[i]->stateInitialize) {
> VIR_DEBUG("Running global init for %s state driver",
> @@ -858,6 +860,9 @@ int virStateCleanup(void) {
> virStateDriverTab[i]->stateCleanup() < 0)
> ret = -1;
> }
> +
> + virHostdevManagerCleanup();
> +
> return ret;
> }
>
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index cdd0b41..824de4e 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -1991,6 +1991,21 @@ virXPathULong;
> virXPathULongHex;
> virXPathULongLong;
>
> +#util/virhostdevmanager.h
> +virHostdevManagerGetDefault;
> +virHostdevManagerInit;
> +virHostdevManagerCleanup;
> +virHostdevManagerPrepareHostdevs;
> +virHostdevManagerReAttachHostdevs;
> +virHostdevManagerPreparePciHostdevs;
> +virHostdevManagerPrepareUsbHostdevs;
> +virHostdevManagerReAttachPciHostdevs;
> +virHostdevManagerReAttachUsbHostdevs;
> +virGetActivePciHostdevs;
> +virGetActiveUsbHostdevs;
> +virGetDomainActivePciHostdevs;
> +virGetDomainActiveUsbHostdevs;
> +virFreeHostdevNameList;
"make check" reports wrong symbol order here.
>
> # Let emacs know we want case-insensitive sorting
> # Local Variables:
> diff --git a/src/util/virhostdevmanager.c b/src/util/virhostdevmanager.c
> new file mode 100644
> index 0000000..9034212
> --- /dev/null
> +++ b/src/util/virhostdevmanager.c
> @@ -0,0 +1,1218 @@
> +/*
> + * Copyright (C) 2013 SUSE LINUX Products GmbH, Nuernberg, Germany.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> + * <
http://www.gnu.org/licenses/>.
> + *
> + * Author: Chunyan Liu <cyliu(a)suse.com>
> + */
> +#include <config.h>
> +
> +#include "virhostdevmanager.h"
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +
> +#include "viralloc.h"
> +#include "virstring.h"
> +#include "virfile.h"
> +#include "virerror.h"
> +#include "virlog.h"
> +#include "virpci.h"
> +#include "virusb.h"
> +#include "virnetdev.h"
> +
> +/* For virReportOOMError() and virReportSystemError() */
> +#define VIR_FROM_THIS VIR_FROM_NONE
> +
> +struct _virHostdevManager{
> + char *stateDir;
> +
> + virPCIDeviceListPtr activePciHostdevs;
> + virPCIDeviceListPtr inactivePciHostdevs;
> + virUSBDeviceListPtr activeUsbHostdevs;
> +};
> +
> +static virHostdevManagerPtr hostdevMgr;
> +
> +int virHostdevManagerInit(void)
> +{
> + char ebuf[1024];
> + if (VIR_ALLOC(hostdevMgr) < 0)
> + return -1;
> +
> + if ((hostdevMgr->activePciHostdevs = virPCIDeviceListNew()) == NULL)
> + return -1;
goto out_of_memory ?
> +
> + if ((hostdevMgr->activeUsbHostdevs = virUSBDeviceListNew()) == NULL)
> + return -1;
goto out_of_memory ?
> +
> + if ((hostdevMgr->inactivePciHostdevs = virPCIDeviceListNew()) == NULL)
> + return -1;
goto out_of_memory ?
> +
> + if (virAsprintf(&hostdevMgr->stateDir,
> + "%s",HOSTDEV_STATE_DIR) == -1)
> + goto out_of_memory;
> +
> + if (virFileMakePath(hostdevMgr->stateDir) < 0) {
> + VIR_ERROR(_("Failed to create state dir '%s': %s"),
> + hostdevMgr->stateDir, virStrerror(errno, ebuf,
sizeof(ebuf)));
> + goto error;
> + }
> +
> + return 0;
> +
> +out_of_memory:
> + virReportOOMError();
> +error:
> + return -1;
> +}
> +
> +void virHostdevManagerCleanup(void)
> +{
> + if(!hostdevMgr)
> + return;
> +
> + virObjectUnref(hostdevMgr->activePciHostdevs);
> + virObjectUnref(hostdevMgr->inactivePciHostdevs);
> + virObjectUnref(hostdevMgr->activeUsbHostdevs);
> +
> + VIR_FREE(hostdevMgr->stateDir);
> +}
> +
> +virHostdevManagerPtr virHostdevManagerGetDefault(void)
> +{
> + return hostdevMgr;
> +}
> +
> +static int
> +virDomainHostdevPciSysfsPath(virDomainHostdevDefPtr hostdev, char **sysfs_path)
> +{
> + virPCIDeviceAddress config_address;
> +
> + config_address.domain = hostdev->source.subsys.u.pci.addr.domain;
> + config_address.bus = hostdev->source.subsys.u.pci.addr.bus;
> + config_address.slot = hostdev->source.subsys.u.pci.addr.slot;
> + config_address.function = hostdev->source.subsys.u.pci.addr.function;
> +
> + return virPCIDeviceAddressGetSysfsFile(&config_address, sysfs_path);
> +}
> +
> +static int
> +virDomainHostdevIsVirtualFunction(virDomainHostdevDefPtr hostdev)
> +{
> + char *sysfs_path = NULL;
> + int ret = -1;
> +
> + if (virDomainHostdevPciSysfsPath(hostdev, &sysfs_path) < 0)
> + return ret;
> +
> + ret = virPCIIsVirtualFunction(sysfs_path);
> +
> + VIR_FREE(sysfs_path);
> +
> + return ret;
> +}
> +
> +static int
> +virDomainHostdevNetDevice(virDomainHostdevDefPtr hostdev, char **linkdev,
> + int *vf)
> +{
> + int ret = -1;
> + char *sysfs_path = NULL;
> +
> + if (virDomainHostdevPciSysfsPath(hostdev, &sysfs_path) < 0)
> + return ret;
> +
> + if (virPCIIsVirtualFunction(sysfs_path) == 1) {
> + if (virPCIGetVirtualFunctionInfo(sysfs_path, linkdev,
> + vf) < 0)
> + goto cleanup;
> + } else {
> + if (virPCIGetNetName(sysfs_path, linkdev) < 0)
> + goto cleanup;
> + *vf = -1;
> + }
> +
> + ret = 0;
> +
> +cleanup:
> + VIR_FREE(sysfs_path);
> +
> + return ret;
> +}
> +
> +static int
> +virDomainHostdevNetConfigVirtPortProfile(const char *linkdev, int vf,
> + virNetDevVPortProfilePtr virtPort,
> + const virMacAddrPtr macaddr,
> + const unsigned char *uuid,
> + int associate)
> +{
> + int ret = -1;
> +
> + if (!virtPort)
> + return ret;
> +
> + switch (virtPort->virtPortType) {
> + case VIR_NETDEV_VPORT_PROFILE_NONE:
> + case VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH:
> + case VIR_NETDEV_VPORT_PROFILE_8021QBG:
> + case VIR_NETDEV_VPORT_PROFILE_LAST:
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("virtualport type %s is "
> + "currently not supported on interfaces of type "
> + "hostdev"),
> + virNetDevVPortTypeToString(virtPort->virtPortType));
> + break;
> +
> + case VIR_NETDEV_VPORT_PROFILE_8021QBH:
> + if (associate)
> + ret = virNetDevVPortProfileAssociate(NULL, virtPort, macaddr,
> + linkdev, vf, uuid,
> + VIR_NETDEV_VPORT_PROFILE_OP_CREATE,
false);
> + else
> + ret = virNetDevVPortProfileDisassociate(NULL, virtPort,
> + macaddr, linkdev, vf,
> +
VIR_NETDEV_VPORT_PROFILE_OP_DESTROY);
> + break;
> + }
> +
> + return ret;
> +}
> +
> +static int
> +virDomainHostdevNetConfigReplace(virDomainHostdevDefPtr hostdev,
> + const unsigned char *uuid,
> + char *stateDir)
> +{
> + char *linkdev = NULL;
> + virNetDevVlanPtr vlan;
> + virNetDevVPortProfilePtr virtPort;
> + int ret = -1;
> + int vf = -1;
> + int vlanid = -1;
> + int port_profile_associate = 1;
> + int isvf;
> +
> + isvf = virDomainHostdevIsVirtualFunction(hostdev);
> + if (isvf <= 0) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("Interface type hostdev is currently supported
on"
> + " SR-IOV Virtual Functions only"));
> + return ret;
> + }
> +
> + if (virDomainHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
> + return ret;
> +
> + vlan = virDomainNetGetActualVlan(hostdev->parent.data.net);
> + virtPort = virDomainNetGetActualVirtPortProfile(
> +
hostdev->parent.data.net);
> + if (virtPort) {
> + if (vlan) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("direct setting of the vlan tag is not allowed
"
> + "for hostdev devices using %s mode"),
> + virNetDevVPortTypeToString(virtPort->virtPortType));
> + goto cleanup;
> + }
> + ret = virDomainHostdevNetConfigVirtPortProfile(linkdev, vf,
> + virtPort, &hostdev->parent.data.net->mac,
uuid,
> + port_profile_associate);
> + } else {
> + /* Set only mac and vlan */
> + if (vlan) {
> + if (vlan->nTags != 1 || vlan->trunk) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("vlan trunking is not supported "
> + "by SR-IOV network devices"));
> + goto cleanup;
> + }
> + if (vf == -1) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
> + _("vlan can only be set for SR-IOV VFs, but
"
> + "%s is not a VF"), linkdev);
> + goto cleanup;
> + }
> + vlanid = vlan->tag[0];
> + } else if (vf >= 0) {
> + vlanid = 0; /* assure any current vlan tag is reset */
> + }
> +
> + ret = virNetDevReplaceNetConfig(linkdev, vf,
> + &hostdev->parent.data.net->mac,
> + vlanid, stateDir);
> + }
> +cleanup:
> + VIR_FREE(linkdev);
> + return ret;
> +}
> +
> +static int
> +virDomainHostdevNetConfigRestore(virDomainHostdevDefPtr hostdev,
> + char *stateDir)
> +{
> + char *linkdev = NULL;
> + virNetDevVPortProfilePtr virtPort;
> + int ret = -1;
> + int vf = -1;
> + int port_profile_associate = 0;
> + int isvf;
> +
> + isvf = virDomainHostdevIsVirtualFunction(hostdev);
> + if (isvf <= 0) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("Interface type hostdev is currently supported
on"
> + " SR-IOV Virtual Functions only"));
> + return ret;
> + }
> +
> + if (virDomainHostdevNetDevice(hostdev, &linkdev, &vf) < 0)
> + return ret;
> +
> + virtPort = virDomainNetGetActualVirtPortProfile(
> +
hostdev->parent.data.net);
> + if (virtPort)
> + ret = virDomainHostdevNetConfigVirtPortProfile(linkdev, vf, virtPort,
> + &hostdev->parent.data.net->mac,
NULL,
> + port_profile_associate);
> + else
> + ret = virNetDevRestoreNetConfig(linkdev, vf, stateDir);
> +
> + VIR_FREE(linkdev);
> +
> + return ret;
> +}
> +
> +static virPCIDeviceListPtr
> +virHostdevManagerGetPciHostDeviceList(
> + virDomainHostdevDefPtr *hostdevs,
> + int nhostdevs)
> +{
> + virPCIDeviceListPtr list;
> + int i;
> +
> + if (!(list = virPCIDeviceListNew()))
> + return NULL;
> +
> + for (i = 0 ; i < nhostdevs ; i++) {
> + virDomainHostdevDefPtr hostdev = hostdevs[i];
> + virPCIDevicePtr dev;
> +
> + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS)
> + continue;
> + if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI)
> + continue;
> +
> + dev = virPCIDeviceNew(hostdev->source.subsys.u.pci.addr.domain,
> + hostdev->source.subsys.u.pci.addr.bus,
> + hostdev->source.subsys.u.pci.addr.slot,
> + hostdev->source.subsys.u.pci.addr.function);
> + if (!dev) {
> + virObjectUnref(list);
> + return NULL;
> + }
> +
> + if (virPCIDeviceListAdd(list, dev) < 0) {
> + virPCIDeviceFree(dev);
> + virObjectUnref(list);
> + return NULL;
> + }
> +
> + virPCIDeviceSetManaged(dev, hostdev->managed);
> + }
> +
> + return list;
> +}
> +
> +int
> +virHostdevManagerPreparePciHostdevs(virHostdevManagerPtr mgr,
> + const char *drv_name,
> + const char *dom_name,
> + const unsigned char *uuid,
> + virDomainHostdevDefPtr *hostdevs,
> + int nhostdevs,
> + unsigned int flags)
> +{
> + virPCIDeviceListPtr pcidevs;
> + int last_processed_hostdev_vf = -1;
> + int i;
> + int ret = -1;
> +
> + virObjectLock(mgr->activePciHostdevs);
> + virObjectLock(mgr->inactivePciHostdevs);
> +
> + if (!(pcidevs = virHostdevManagerGetPciHostDeviceList(hostdevs, nhostdevs)))
> + goto cleanup;
> +
> + /* We have to use 9 loops here. *All* devices must
> + * be detached before we reset any of them, because
> + * in some cases you have to reset the whole PCI,
> + * which impacts all devices on it. Also, all devices
> + * must be reset before being marked as active.
> + */
> +
> + /* Loop 1: validate that non-managed device isn't in use, eg
> + * by checking that device is either un-bound, or bound
> + * to pci-stub.ko
> + */
> +
> + for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) {
> + virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i);
> + virPCIDevicePtr other;
> + bool strict_acs_check = (flags & VIR_STRICT_ACS_CHECK)?true:false;
> +
> + if (!virPCIDeviceIsAssignable(dev, strict_acs_check)) {
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + _("PCI device %s is not assignable"),
> + virPCIDeviceGetName(dev));
> + goto cleanup;
> + }
> + /* The device is in use by other active domain if
> + * the dev is in list activePciHostdevs.
> + */
> + if ((other = virPCIDeviceListFind(mgr->activePciHostdevs, dev))) {
> + char *other_drvname = NULL;
> + char *other_domname = NULL;
> + virPCIDeviceGetUsedBy(other, &other_drvname, &other_domname);
> +
> + if (other_drvname && other_domname)
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + _("PCI device %s is in use by driver %s,domain
%s"),
> + virPCIDeviceGetName(dev), other_drvname,
> + other_domname);
> + else
> + virReportError(VIR_ERR_OPERATION_INVALID,
> + _("PCI device %s is already in use"),
> + virPCIDeviceGetName(dev));
> + VIR_FREE(other_drvname);
> + VIR_FREE(other_domname);
> + goto cleanup;
> + }
> + }
> +
> + /* Loop 2: detach managed devices */
> + for (i = 0; i < virPCIDeviceListCount(pcidevs); i++) {
> + virPCIDevicePtr dev = virPCIDeviceListGet(pcidevs, i);
> + const char *stub_driver;
> + if (STREQ_NULLABLE(drv_name, "xenlight"))
> + stub_driver = "pciback";
> + else
> + stub_driver = "pci_stub";
Really need hardcode driver name here?
Actually, no, it's not needed, and the code this is replacing has
changed, so this all needs a rebase.
The way that it works now (with the code that's in qemu) is that
qemuGetPciHostDeviceList() (which builds a list of virPCIDevice from a
list of virDomainHostDevDef) checks for a <driver name='xxx'/> setting,
and if driver name is set to "vfio", it calls
virPCIDeviceSetStubDriver(dev, "vfio-pci"), otherwise it calls
virPCIDeviceSetStubDriver(dev, "pci-stub"). I guess your new
virHostdevManagerGetPciHostDeviceList() needs to be made aware of which
hypervisor is being used, and set the stub driver accordingly.
Everything else in the device detach/reattach should be made to either
use the stub driver name stored in the virPCIDevice, or to auto-detect
it (virPCIDeviceUnbindFromStub() shows how to do that).
BTW "xen" driver missing here.
Perhaps this can be passes as parameter (with default to "pci_stub" if NULL)?
Or set by driver before calling this function (virPCIDeviceSetSubDriver?).
Right, that function is newer than the code this patch is based from.
[...]
> +
> +/*
> + * Pre-condition: mgr->inactivePciHostdevs & mgr->activePciHostdevs
> + * are locked
> + */
> +static void
> +virReattachPciDevice(virHostdevManagerPtr mgr, virPCIDevicePtr dev, const char
*driver)
> +{
> + /* If the device is not managed and was attached to guest
> + * successfully, it must have been inactive.
> + */
> + if (!virPCIDeviceGetManaged(dev)) {
> + if (virPCIDeviceListAdd(mgr->inactivePciHostdevs, dev) < 0)
> + virPCIDeviceFree(dev);
> + return;
> + }
> +
> + if (STREQ_NULLABLE(driver, "QEMU")) {
Same as earlier, perhaps the better approach will be checking for current stub
driver here?
Hmm, yes. I wonder what this does for vfio. Is this needed for something
in the pci-stub driver? Or is it common to any device assignment in
qemu? (Fortunately it's harmless (although a bit wasteful of CPU time)
when the device hasn't been used by qemu/kvm.
> + int retries = 100;
> +
> + while (virPCIDeviceWaitForCleanup(dev, "kvm_assigned_device")
> + && retries) {
> + usleep(100*1000);
> + retries--;
> + }
> + }
> +
> + if (virPCIDeviceReattach(dev, mgr->activePciHostdevs,
> + mgr->inactivePciHostdevs) < 0) {
> + virErrorPtr err = virGetLastError();
> + VIR_ERROR(_("Failed to re-attach PCI device: %s"),
> + err ? err->message : _("unknown error"));
> + virResetError(err);
> + }
> + virPCIDeviceFree(dev);
> +}
> +
> +/*
>
Generally, please check for recent changes to the functions you're
basing this on. We don't want any of that to mysteriously disappear when
we switch from the qemu-specific functions to these general functions.