This module will be used by virHostdevManager and it's inspired
by virPCIDevice module. They are very similar except instead of
what makes a NVMe device: PCI address AND namespace ID. This
means that a NVMe device can appear in a domain multiple times,
each time with a different namespace.
Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
---
src/libvirt_private.syms | 18 ++
src/util/Makefile.inc.am | 2 +
src/util/virnvme.c | 454 +++++++++++++++++++++++++++++++++++++++
src/util/virnvme.h | 95 ++++++++
4 files changed, 569 insertions(+)
create mode 100644 src/util/virnvme.c
create mode 100644 src/util/virnvme.h
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index d383dbe929..0c84b347db 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2629,6 +2629,24 @@ virNumaSetPagePoolSize;
virNumaSetupMemoryPolicy;
+# util/virnvme.h
+virNVMeDeviceAddressGet;
+virNVMeDeviceCopy;
+virNVMeDeviceFree;
+virNVMeDeviceListAdd;
+virNVMeDeviceListCount;
+virNVMeDeviceListCreateDetachList;
+virNVMeDeviceListDel;
+virNVMeDeviceListGet;
+virNVMeDeviceListLookup;
+virNVMeDeviceListLookupIndex;
+virNVMeDeviceListNew;
+virNVMeDeviceNew;
+virNVMeDeviceUsedByClear;
+virNVMeDeviceUsedByGet;
+virNVMeDeviceUsedBySet;
+
+
# util/virobject.h
virClassForObject;
virClassForObjectLockable;
diff --git a/src/util/Makefile.inc.am b/src/util/Makefile.inc.am
index 482b657a90..7e677b891c 100644
--- a/src/util/Makefile.inc.am
+++ b/src/util/Makefile.inc.am
@@ -145,6 +145,8 @@ UTIL_SOURCES = \
util/virnetlink.h \
util/virnodesuspend.c \
util/virnodesuspend.h \
+ util/virnvme.c \
+ util/virnvme.h \
util/virkmod.c \
util/virkmod.h \
util/virnuma.c \
diff --git a/src/util/virnvme.c b/src/util/virnvme.c
new file mode 100644
index 0000000000..f52955c615
--- /dev/null
+++ b/src/util/virnvme.c
@@ -0,0 +1,454 @@
+/*
+ * virnvme.c: helper APIs for managing NVMe devices
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <
http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "virnvme.h"
+#include "virobject.h"
+#include "virpci.h"
+#include "viralloc.h"
+#include "virlog.h"
+#include "virstring.h"
+
+VIR_LOG_INIT("util.nvme");
+#define VIR_FROM_THIS VIR_FROM_NONE
+
+struct _virNVMeDevice {
+ virPCIDeviceAddress address; /* PCI address of controller */
+ unsigned int namespace; /* Namespace ID */
+ bool managed;
+
+ char *drvname;
+ char *domname;
+};
+
+
+struct _virNVMeDeviceList {
+ virObjectLockable parent;
+
+ size_t count;
+ virNVMeDevicePtr *devs;
+};
+
+
+static virClassPtr virNVMeDeviceListClass;
+
+static void virNVMeDeviceListDispose(void *obj);
+
+static int
+virNVMeOnceInit(void)
+{
+ if (!VIR_CLASS_NEW(virNVMeDeviceList, virClassForObjectLockable()))
+ return -1;
+
+ return 0;
+}
+
+VIR_ONCE_GLOBAL_INIT(virNVMe);
+
+
+virNVMeDevicePtr
+virNVMeDeviceNew(const virPCIDeviceAddress *address,
+ unsigned long namespace,
+ bool managed)
+{
+ VIR_AUTOPTR(virNVMeDevice) dev = NULL;
+
+ if (VIR_ALLOC(dev) < 0)
+ return NULL;
+
+ virPCIDeviceAddressCopy(&dev->address, address);
+ dev->namespace = namespace;
+ dev->managed = managed;
+
+ VIR_RETURN_PTR(dev);
+}
+
+
+void
+virNVMeDeviceFree(virNVMeDevicePtr dev)
+{
+ if (!dev)
+ return;
+
+ virNVMeDeviceUsedByClear(dev);
+ VIR_FREE(dev);
+}
+
+
+virNVMeDevicePtr
+virNVMeDeviceCopy(const virNVMeDevice *dev)
+{
+ VIR_AUTOPTR(virNVMeDevice) copy = NULL;
+
+ if (VIR_ALLOC(copy) < 0 ||
+ VIR_STRDUP(copy->drvname, dev->drvname) < 0 ||
+ VIR_STRDUP(copy->domname, dev->domname) < 0)
+ return NULL;
+
+ virPCIDeviceAddressCopy(©->address, &dev->address);
+ copy->namespace = dev->namespace;
+ copy->managed = dev->managed;
+
+ VIR_RETURN_PTR(copy);
+}
+
+
+const virPCIDeviceAddress *
+virNVMeDeviceAddressGet(const virNVMeDevice *dev)
+{
+ return &dev->address;
+}
+
+
+void
+virNVMeDeviceUsedByClear(virNVMeDevicePtr dev)
+{
+ VIR_FREE(dev->drvname);
+ VIR_FREE(dev->domname);
+}
+
+
+void
+virNVMeDeviceUsedByGet(const virNVMeDevice *dev,
+ const char **drv,
+ const char **dom)
+{
+ *drv = dev->drvname;
+ *dom = dev->domname;
+}
+
+
+int
+virNVMeDeviceUsedBySet(virNVMeDevicePtr dev,
+ const char *drv,
+ const char *dom)
+{
+ if (VIR_STRDUP(dev->drvname, drv) < 0 ||
+ VIR_STRDUP(dev->domname, dom) < 0) {
+ virNVMeDeviceUsedByClear(dev);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+virNVMeDeviceListPtr
+virNVMeDeviceListNew(void)
+{
+ virNVMeDeviceListPtr list;
+
+ if (virNVMeInitialize() < 0)
+ return NULL;
+
+ if (!(list = virObjectLockableNew(virNVMeDeviceListClass)))
+ return NULL;
+
+ return list;
+}
+
+
+static void
+virNVMeDeviceListDispose(void *obj)
+{
+ virNVMeDeviceListPtr list = obj;
+ size_t i;
+
+ for (i = 0; i < list->count; i++)
+ virNVMeDeviceFree(list->devs[i]);
+
+ VIR_FREE(list->devs);
+}
+
+
+size_t
+virNVMeDeviceListCount(const virNVMeDeviceList *list)
+{
+ return list->count;
+}
+
+
+int
+virNVMeDeviceListAdd(virNVMeDeviceListPtr list,
+ const virNVMeDevice *dev)
+{
+ virNVMeDevicePtr tmp;
+
+ if ((tmp = virNVMeDeviceListLookup(list, dev))) {
+ VIR_AUTOFREE(char *) addrStr =
virPCIDeviceAddressAsString(&tmp->address);
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("NVMe device %s namespace %u is already on the
list"),
+ NULLSTR(addrStr), tmp->namespace);
+ return -1;
+ }
+
+ if (!(tmp = virNVMeDeviceCopy(dev)) ||
+ VIR_APPEND_ELEMENT(list->devs, list->count, tmp) < 0) {
+ virNVMeDeviceFree(tmp);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+virNVMeDeviceListDel(virNVMeDeviceListPtr list,
+ const virNVMeDevice *dev)
+{
+ ssize_t idx;
+ virNVMeDevicePtr tmp = NULL;
+
+ if ((idx = virNVMeDeviceListLookupIndex(list, dev)) < 0) {
+ VIR_AUTOFREE(char *) addrStr =
virPCIDeviceAddressAsString(&dev->address);
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("NVMe device %s namespace %u not found"),
+ NULLSTR(addrStr), dev->namespace);
+ return -1;
+ }
+
+ tmp = list->devs[idx];
+ VIR_DELETE_ELEMENT(list->devs, idx, list->count);
+ virNVMeDeviceFree(tmp);
+ return 0;
+}
+
+
+virNVMeDevicePtr
+virNVMeDeviceListGet(virNVMeDeviceListPtr list,
+ size_t i)
+{
+ return i < list->count ? list->devs[i] : NULL;
+}
+
+
+virNVMeDevicePtr
+virNVMeDeviceListLookup(virNVMeDeviceListPtr list,
+ const virNVMeDevice *dev)
+{
+ ssize_t idx;
+
+ if ((idx = virNVMeDeviceListLookupIndex(list, dev)) < 0)
+ return NULL;
+
+ return list->devs[idx];
+}
+
+
+ssize_t
+virNVMeDeviceListLookupIndex(virNVMeDeviceListPtr list,
+ const virNVMeDevice *dev)
+{
+ size_t i;
+
+ if (!list)
+ return -1;
+
+ for (i = 0; i < list->count; i++) {
+ virNVMeDevicePtr other = list->devs[i];
+
+ if (virPCIDeviceAddressEqual(&dev->address, &other->address)
&&
+ dev->namespace == other->namespace)
+ return i;
+ }
+
+ return -1;
+}
+
+
+static virNVMeDevicePtr
+virNVMeDeviceListLookupByPCIAddress(virNVMeDeviceListPtr list,
+ const virPCIDeviceAddress *address)
+{
+ size_t i;
+
+ if (!list)
+ return NULL;
+
+ for (i = 0; i < list->count; i++) {
+ virNVMeDevicePtr other = list->devs[i];
+
+ if (virPCIDeviceAddressEqual(address, &other->address))
+ return other;
+ }
+
+ return NULL;
+}
+
+
+static virPCIDevicePtr
+virNVMeDeviceCreatePCIDevice(const virNVMeDevice *nvme)
+{
+ VIR_AUTOPTR(virPCIDevice) pci = NULL;
+
+ if (!(pci = virPCIDeviceNew(nvme->address.domain,
+ nvme->address.bus,
+ nvme->address.slot,
+ nvme->address.function)))
+ return NULL;
+
+ /* NVMe devices must be bound to vfio */
+ virPCIDeviceSetStubDriver(pci, VIR_PCI_STUB_DRIVER_VFIO);
+ virPCIDeviceSetManaged(pci, nvme->managed);
+
+ VIR_RETURN_PTR(pci);
+}
+
+
+/**
+ * virNVMeDeviceListCreateDetachList:
+ * @activeList: list of active NVMe devices
+ * @toDetachList: list of NVMe devices to detach from the host
+ *
+ * This function creates a list of PCI devices which can then be
+ * reused by PCI device detach functions (e.g.
+ * virHostdevPreparePCIDevicesImpl()) as each PCI device from the
+ * returned list is initialized properly for detach.
+ *
+ * Basically, this just blindly collects unique PCI addresses
+ * from @toDetachList that don't appear on @activeList.
+ *
+ * Returns: a list on success,
+ * NULL otherwise.
+ */
+virPCIDeviceListPtr
+virNVMeDeviceListCreateDetachList(virNVMeDeviceListPtr activeList,
+ virNVMeDeviceListPtr toDetachList)
+{
+ VIR_AUTOUNREF(virPCIDeviceListPtr) pciDevices = NULL;
+ size_t i;
+
+ if (!(pciDevices = virPCIDeviceListNew()))
+ return NULL;
+
+ for (i = 0; i < toDetachList->count; i++) {
+ const virNVMeDevice *d = toDetachList->devs[i];
+ VIR_AUTOPTR(virPCIDevice) pci = NULL;
+
+ /* If there is a NVMe device with the same PCI address on
+ * the activeList, the device is already detached. */
+ if (virNVMeDeviceListLookupByPCIAddress(activeList, &d->address))
+ continue;
+
+ /* It may happen that we want to detach two namespaces
+ * from the same NVMe device. This will be represented as
+ * two different instances of virNVMeDevice, but
+ * obviously we want to put the PCI device on the detach
+ * list only once. */
+ if (virPCIDeviceListFindByIDs(pciDevices,
+ d->address.domain,
+ d->address.bus,
+ d->address.slot,
+ d->address.function))
+ continue;
+
+ if (!(pci = virNVMeDeviceCreatePCIDevice(d)))
+ return NULL;
+
+ if (virPCIDeviceListAdd(pciDevices, pci) < 0)
+ return NULL;
+
+ /* avoid freeing the device */
+ pci = NULL;
+ }
+
+ VIR_RETURN_PTR(pciDevices);
+}
+
+
+/**
+ * virNVMeDeviceListCreateReAttachList:
+ * @activeList: list of active NVMe devices
+ * @toReAttachList: list of devices to reattach to the host
+ *
+ * This is a counterpart to virNVMeDeviceListCreateDetachList.
+ *
+ * This function creates a list of PCI devices which can then be
+ * reused by PCI device reattach functions (e.g.
+ * virHostdevReAttachPCIDevicesImpl()) as each PCI device from
+ * the returned list is initialized properly for reattach.
+ *
+ * Basically, this just collects unique PCI addresses
+ * of devices that appear on @toReAttachList and are used
+ * exactly once (i.e. no other namespaces are used from the same
+ * NVMe device). For that purpose, this function needs to know
+ * list of active NVMe devices (@activeList).
+ *
+ * Returns: a list on success,
+ * NULL otherwise.
+ */
+virPCIDeviceListPtr
+virNVMeDeviceListCreateReAttachList(virNVMeDeviceListPtr activeList,
+ virNVMeDeviceListPtr toReAttachList)
+{
+ VIR_AUTOUNREF(virPCIDeviceListPtr) pciDevices = NULL;
+ size_t i;
+
+ if (!(pciDevices = virPCIDeviceListNew()))
+ return NULL;
+
+ for (i = 0; i < toReAttachList->count; i++) {
+ const virNVMeDevice *d = toReAttachList->devs[i];
+ VIR_AUTOPTR(virPCIDevice) pci = NULL;
+ size_t nused = 0;
+
+ /* Check if there is any other NVMe device with the same PCI address as
+ * @d. To simplify this, let's just count how many NVMe devices with
+ * the same PCI address there are on the @activeList. */
+ for (i = 0; i < activeList->count; i++) {
+ virNVMeDevicePtr other = activeList->devs[i];
+
+ if (!virPCIDeviceAddressEqual(&d->address, &other->address))
+ continue;
+
+ nused++;
+ }
+
+ /* Now, the following cases can happen:
+ * nused > 1 -> there are other NVMe device active, do NOT detach it
+ * nused == 1 -> we've found only @d on the @activeList, detach it
+ * nused == 0 -> huh, wait, what? @d is NOT on the @active list, how can
+ * we reattach it?
+ */
+
+ if (nused == 0) {
+ /* Shouldn't happen (TM) */
+ VIR_AUTOFREE(char *) addrStr =
virPCIDeviceAddressAsString(&d->address);
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("NVMe device %s namespace %u not found"),
+ NULLSTR(addrStr), d->namespace);
+ return NULL;
+ } else if (nused > 1) {
+ /* NVMe device is still in use */
+ continue;
+ }
+
+ /* nused == 1 -> detach the device */
+ if (!(pci = virNVMeDeviceCreatePCIDevice(d)))
+ return NULL;
+
+ if (virPCIDeviceListAdd(pciDevices, pci) < 0)
+ return NULL;
+
+ /* avoid freeing the device */
+ pci = NULL;
+ }
+
+ VIR_RETURN_PTR(pciDevices);
+}
diff --git a/src/util/virnvme.h b/src/util/virnvme.h
new file mode 100644
index 0000000000..e582f2b572
--- /dev/null
+++ b/src/util/virnvme.h
@@ -0,0 +1,95 @@
+/*
+ * virnvme.h: helper APIs for managing NVMe devices
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <
http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "virpci.h"
+
+typedef struct _virNVMeDevice virNVMeDevice;
+typedef virNVMeDevice *virNVMeDevicePtr;
+
+/* Note that this list is lockable, and in fact, it is caller's
+ * responsibility to acquire the lock and release it. The reason
+ * is that in a lot of cases the list must be locked between two
+ * API calls and therefore only caller knows when it is safe to
+ * finally release the lock. */
+typedef struct _virNVMeDeviceList virNVMeDeviceList;
+typedef virNVMeDeviceList *virNVMeDeviceListPtr;
+
+virNVMeDevicePtr
+virNVMeDeviceNew(const virPCIDeviceAddress *address,
+ unsigned long namespace,
+ bool managed);
+
+void
+virNVMeDeviceFree(virNVMeDevicePtr dev);
+
+VIR_DEFINE_AUTOPTR_FUNC(virNVMeDevice, virNVMeDeviceFree);
+
+virNVMeDevicePtr
+virNVMeDeviceCopy(const virNVMeDevice *dev);
+
+const virPCIDeviceAddress *
+virNVMeDeviceAddressGet(const virNVMeDevice *dev);
+
+void
+virNVMeDeviceUsedByClear(virNVMeDevicePtr dev);
+
+void
+virNVMeDeviceUsedByGet(const virNVMeDevice *dev,
+ const char **drv,
+ const char **dom);
+
+int
+virNVMeDeviceUsedBySet(virNVMeDevicePtr dev,
+ const char *drv,
+ const char *dom);
+
+virNVMeDeviceListPtr
+virNVMeDeviceListNew(void);
+
+size_t
+virNVMeDeviceListCount(const virNVMeDeviceList *list);
+
+int
+virNVMeDeviceListAdd(virNVMeDeviceListPtr list,
+ const virNVMeDevice *dev);
+
+int
+virNVMeDeviceListDel(virNVMeDeviceListPtr list,
+ const virNVMeDevice *dev);
+
+virNVMeDevicePtr
+virNVMeDeviceListGet(virNVMeDeviceListPtr list,
+ size_t i);
+
+virNVMeDevicePtr
+virNVMeDeviceListLookup(virNVMeDeviceListPtr list,
+ const virNVMeDevice *dev);
+
+ssize_t
+virNVMeDeviceListLookupIndex(virNVMeDeviceListPtr list,
+ const virNVMeDevice *dev);
+
+virPCIDeviceListPtr
+virNVMeDeviceListCreateDetachList(virNVMeDeviceListPtr activeList,
+ virNVMeDeviceListPtr toDetachList);
+
+virPCIDeviceListPtr
+virNVMeDeviceListCreateReAttachList(virNVMeDeviceListPtr activeList,
+ virNVMeDeviceListPtr toReAttachList);
--
2.21.0