[libvirt] [PATCH 0/6 v8] Unprivleged SG_IO support

As a result of RFC [1], this implements the unprivleged SG_IO support. 1/6 and 2/6 are already acked. v7 - v8: * Change the XML tag name from "cdbfilter" to "sgio", and to leave enough room for future values, the values of "sgio" are "filtered" and "unfiltered" now. v6 - v7: * No restoring of unpriv_sgio per Daniel's thought. * Use "major:minor" as the hash key per Jirka's suggestion. Osier Yang (6): util: Prepare helpers for unpriv_sgio setting qemu: Add a hash table for the shared disks docs: Add docs and rng schema for new XML tag sgio conf: Parse and format the new XML qemu: qemu: set unpriv_sgio when starting domain and attaching disk qemu: Check if the shared disk's cdbfilter conflicts with others docs/formatdomain.html.in | 14 ++- docs/schemas/domaincommon.rng | 54 +++++--- src/conf/domain_conf.c | 55 ++++++-- src/conf/domain_conf.h | 10 ++ src/libvirt_private.syms | 4 + src/qemu/qemu_conf.c | 86 ++++++++++++ src/qemu/qemu_conf.h | 12 ++ src/qemu/qemu_driver.c | 30 ++++ src/qemu/qemu_process.c | 99 ++++++++++++++ src/qemu/qemu_process.h | 4 + src/util/util.c | 140 ++++++++++++++++++++ src/util/util.h | 13 ++ ...qemuxml2argv-disk-scsi-lun-passthrough-sgio.xml | 32 +++++ tests/qemuxml2xmltest.c | 1 + 14 files changed, 522 insertions(+), 32 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-scsi-lun-passthrough-sgio.xml [1] https://www.redhat.com/archives/libvir-list/2012-November/msg00988.html [2] https://www.redhat.com/archives/libvir-list/2012-December/msg00325.html Regards, Osier

"virGetDeviceID" could be used across the sources, but it doesn't relate with this series, and could be done later. * src/util/util.h: (Declare virGetDeviceID, and vir{Get,Set}DeviceUnprivSGIO) * src/util/util.c: (Implement virGetDeviceID and vir{Get,Set}DeviceUnprivSGIO) * src/libvirt_private.syms: Export private symbols of upper helpers --- src/libvirt_private.syms | 3 + src/util/util.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/util.h | 11 ++++ 3 files changed, 154 insertions(+), 0 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 4ec19a0..e500ea9 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1283,6 +1283,8 @@ virFileWaitForDevices; virFileWriteStr; virFindFileInPath; virFormatIntDecimal; +virGetDeviceID; +virGetDeviceUnprivSGIO; virGetGroupID; virGetGroupName; virGetHostname; @@ -1301,6 +1303,7 @@ virPipeReadUntilEOF; virScaleInteger; virSetBlocking; virSetCloseExec; +virSetDeviceUnprivSGIO; virSetInherit; virSetNonBlock; virSetUIDGID; diff --git a/src/util/util.c b/src/util/util.c index 05e7ca7..06c7e33 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -3129,3 +3129,143 @@ virStrIsPrint(const char *str) return true; } + +#if defined(major) && defined(minor) +int +virGetDeviceID(const char *path, int *maj, int *min) +{ + struct stat sb; + char *canonical_path = NULL; + + if (virFileResolveLink(path, &canonical_path) < 0) + return -errno; + + if (stat(canonical_path, &sb) < 0) { + VIR_FREE(canonical_path); + return -errno; + } + + if (!S_ISBLK(sb.st_mode)) { + VIR_FREE(canonical_path); + return -EINVAL; + } + + if (maj) + *maj = major(sb.st_rdev); + if (min) + *min = minor(sb.st_rdev); + + VIR_FREE(canonical_path); + return 0; +} +#else +int +virGetDeviceID(const char *path ATRRIBUTE_UNUSED, + int *maj ATRRIBUTE_UNUSED, + int *min ATRRIBUTE_UNUSED) +{ + + return -ENOSYS; +} +#endif + +#define SYSFS_DEV_BLOCK_PATH "/sys/dev/block" + +static char * +virGetUnprivSGIOSysfsPath(const char *path, + const char *sysfs_dir) +{ + int maj, min; + char *sysfs_path = NULL; + int rc; + + if ((rc = virGetDeviceID(path, &maj, &min)) < 0) { + virReportSystemError(-rc, + _("Unable to get device ID '%s'"), + path); + return NULL; + } + + if (virAsprintf(&sysfs_path, "%s/%d:%d/queue/unpriv_sgio", + sysfs_dir ? sysfs_dir : SYSFS_DEV_BLOCK_PATH, + maj, min) < 0) { + virReportOOMError(); + return NULL; + } + + return sysfs_path; +} + +int +virSetDeviceUnprivSGIO(const char *path, + const char *sysfs_dir, + int unpriv_sgio) +{ + char *sysfs_path = NULL; + char *val = NULL; + int ret = -1; + int rc; + + if (!(sysfs_path = virGetUnprivSGIOSysfsPath(path, sysfs_dir))) + return -1; + + if (!virFileExists(sysfs_path)) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("unpriv_sgio is not supported by this kernel")); + goto cleanup; + } + + if (virAsprintf(&val, "%d", unpriv_sgio) < 0) { + virReportOOMError(); + goto cleanup; + } + + if ((rc = virFileWriteStr(sysfs_path, val, 0)) < 0) { + virReportSystemError(-rc, _("failed to set %s"), sysfs_path); + goto cleanup; + } + + ret = 0; +cleanup: + VIR_FREE(sysfs_path); + VIR_FREE(val); + return ret; +} + +int +virGetDeviceUnprivSGIO(const char *path, + const char *sysfs_dir, + int *unpriv_sgio) +{ + char *sysfs_path = NULL; + char *buf = NULL; + char *tmp = NULL; + int ret = -1; + + if (!(sysfs_path = virGetUnprivSGIOSysfsPath(path, sysfs_dir))) + return -1; + + if (!virFileExists(sysfs_path)) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("unpriv_sgio is not supported by this kernel")); + goto cleanup; + } + + if (virFileReadAll(sysfs_path, 1024, &buf) < 0) + goto cleanup; + + if ((tmp = strchr(buf, '\n'))) + *tmp = '\0'; + + if (virStrToLong_i(buf, NULL, 10, unpriv_sgio) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("failed to parse value of %s"), sysfs_path); + goto cleanup; + } + + ret = 0; +cleanup: + VIR_FREE(sysfs_path); + VIR_FREE(buf); + return ret; +} diff --git a/src/util/util.h b/src/util/util.h index 6d5dd03..71003f7 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -281,4 +281,15 @@ bool virIsDevMapperDevice(const char *dev_name) ATTRIBUTE_NONNULL(1); bool virValidateWWN(const char *wwn); bool virStrIsPrint(const char *str); + +int virGetDeviceID(const char *path, + int *maj, + int *min); +int virSetDeviceUnprivSGIO(const char *path, + const char *sysfs_dir, + int unpriv_sgio); +int virGetDeviceUnprivSGIO(const char *path, + const char *sysfs_dir, + int *unpriv_sgio); + #endif /* __VIR_UTIL_H__ */ -- 1.7.7.6

This introduces a hash table for qemu driver, to store the shared disk's info as (@major:minor, @ref_count). @ref_count is the number of domains which shares the disk. Since we only care about if the disk support unprivileged SG_IO commands, and the SG_IO commands only make sense for block disk, this patch only manages (add/remove hash entry) the shared disk for block disk. * src/qemu/qemu_conf.h: (Add member 'sharedDisks' of type virHashTablePtr; Declare helpers qemuGetSharedDiskKey, qemuAddSharedDisk and qemuRemoveSharedDisk) * src/qemu/qemu_conf.c (Implement the 3 helpers) * src/qemu/qemu_process.c (Update 'sharedDisks' when domain starting and shutdown) * src/qemu/qemu_driver.c (Update 'sharedDisks' when attaching or detaching disk). --- src/qemu/qemu_conf.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_conf.h | 12 ++++++ src/qemu/qemu_driver.c | 22 ++++++++++++ src/qemu/qemu_process.c | 15 ++++++++ 4 files changed, 135 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index a1b1d04..e25376d 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -553,3 +553,89 @@ qemuDriverCloseCallbackRunAll(virQEMUDriverPtr driver, virHashForEach(driver->closeCallbacks, qemuDriverCloseCallbackRun, &data); } + +/* Construct the hash key for sharedDisks as "major:minor" */ +char * +qemuGetSharedDiskKey(const char *disk_path) +{ + int major, minor; + char *key = NULL; + int rc; + + if ((rc = virGetDeviceID(disk_path, &major, &minor)) < 0) { + virReportSystemError(-rc, + _("Unable to get minor number of device '%s'"), + disk_path); + return NULL; + } + + if (virAsprintf(&key, "%d:%d", major, minor) < 0) { + virReportOOMError(); + return NULL; + } + + return key; +} + +/* Increase ref count if the entry already exists, otherwise + * add a new entry. + */ +int +qemuAddSharedDisk(virHashTablePtr sharedDisks, + const char *disk_path) +{ + size_t *ref = NULL; + char *key = NULL; + + if (!(key = qemuGetSharedDiskKey(disk_path))) + return -1; + + if ((ref = virHashLookup(sharedDisks, key))) { + if (virHashUpdateEntry(sharedDisks, key, ++ref) < 0) { + VIR_FREE(key); + return -1; + } + } else { + if (virHashAddEntry(sharedDisks, key, (void *)0x1)) { + VIR_FREE(key); + return -1; + } + } + + VIR_FREE(key); + return 0; +} + +/* Decrease the ref count if the entry already exists, otherwise + * remove the entry. + */ +int +qemuRemoveSharedDisk(virHashTablePtr sharedDisks, + const char *disk_path) +{ + size_t *ref = NULL; + char *key = NULL; + + if (!(key = qemuGetSharedDiskKey(disk_path))) + return -1; + + if (!(ref = virHashLookup(sharedDisks, key))) { + VIR_FREE(key); + return -1; + } + + if (ref != (void *)0x1) { + if (virHashUpdateEntry(sharedDisks, key, --ref) < 0) { + VIR_FREE(key); + return -1; + } + } else { + if (virHashRemoveEntry(sharedDisks, key) < 0) { + VIR_FREE(key); + return -1; + } + } + + VIR_FREE(key); + return 0; +} diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index 1a39946..225ba55 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -148,6 +148,8 @@ struct _virQEMUDriver { /* The devices which is are not in use by the host or any guest. */ pciDeviceList *inactivePciHostdevs; + virHashTablePtr sharedDisks; + virBitmapPtr reservedRemotePorts; virSysinfoDefPtr hostsysinfo; @@ -212,4 +214,14 @@ qemuDriverCloseCallback qemuDriverCloseCallbackGet(virQEMUDriverPtr driver, void qemuDriverCloseCallbackRunAll(virQEMUDriverPtr driver, virConnectPtr conn); +int qemuAddSharedDisk(virHashTablePtr sharedDisks, + const char *disk_path) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + +int qemuRemoveSharedDisk(virHashTablePtr sharedDisks, + const char *disk_path) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); +char * qemuGetSharedDiskKey(const char *disk_path) + ATTRIBUTE_NONNULL(1); + #endif /* __QEMUD_CONF_H */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 2dd6922..a313f5e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -844,6 +844,9 @@ qemuStartup(bool privileged, if ((qemu_driver->inactivePciHostdevs = pciDeviceListNew()) == NULL) goto error; + if (!(qemu_driver->sharedDisks = virHashCreate(30, NULL))) + goto error; + if (privileged) { if (chown(qemu_driver->libDir, qemu_driver->user, qemu_driver->group) < 0) { virReportSystemError(errno, @@ -1097,6 +1100,7 @@ qemuShutdown(void) { pciDeviceListFree(qemu_driver->activePciHostdevs); pciDeviceListFree(qemu_driver->inactivePciHostdevs); usbDeviceListFree(qemu_driver->activeUsbHostdevs); + virHashFree(qemu_driver->sharedDisks); virCapabilitiesFree(qemu_driver->caps); qemuCapsCacheFree(qemu_driver->capsCache); @@ -6041,6 +6045,15 @@ qemuDomainAttachDeviceDiskLive(virConnectPtr conn, VIR_WARN("Failed to teardown cgroup for disk path %s", NULLSTR(disk->src)); } + + if (ret == 0 && + disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && + disk->shared) { + if (qemuAddSharedDisk(driver->sharedDisks, disk->src) < 0) + VIR_WARN("Failed to add disk '%s' to shared disk table", + disk->src); + } + end: if (cgroup) virCgroupFree(&cgroup); @@ -6155,6 +6168,15 @@ qemuDomainDetachDeviceDiskLive(virQEMUDriverPtr driver, virDomainDiskDeviceTypeToString(disk->type)); break; } + + if (ret == 0 && + disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && + disk->shared) { + if (qemuRemoveSharedDisk(driver->sharedDisks, disk->src) < 0) + VIR_WARN("Failed to remove disk '%s' from shared disk table", + disk->src); + } + return ret; } diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index cc0e947..a84df07 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3720,6 +3720,8 @@ int qemuProcessStart(virConnectPtr conn, /* in case a certain disk is desirous of CAP_SYS_RAWIO, add this */ for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + if (vm->def->disks[i]->rawio == 1) #ifdef CAP_SYS_RAWIO virCommandAllowCap(cmd, CAP_SYS_RAWIO); @@ -3727,6 +3729,11 @@ int qemuProcessStart(virConnectPtr conn, virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", _("Raw I/O is not supported on this platform")); #endif + + if (disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && disk->shared) { + if (qemuAddSharedDisk(driver->sharedDisks, disk->src) < 0) + goto cleanup; + } } virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData); @@ -4123,6 +4130,14 @@ void qemuProcessStop(virQEMUDriverPtr driver, flags & VIR_QEMU_PROCESS_STOP_MIGRATED); virSecurityManagerReleaseLabel(driver->securityManager, vm->def); + for (i = 0; i < vm->def->ndisks; i++) { + virDomainDiskDefPtr disk = vm->def->disks[i]; + + if (disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && disk->shared) { + ignore_value(qemuRemoveSharedDisk(driver->sharedDisks, disk->src)); + } + } + /* Clear out dynamically assigned labels */ for (i = 0; i < vm->def->nseclabels; i++) { if (vm->def->seclabels[i]->type == VIR_DOMAIN_SECLABEL_DYNAMIC) { -- 1.7.7.6

This introduces new XML tag "sgio" for disk, its valid values are "filtered" and "unfiltered", setting it as "filtered" will set the disk's unpriv_sgio to 0, and "unfiltered" to set it as 1, which allows the unprivileged SG_IO commands. --- docs/formatdomain.html.in | 14 ++++++++++- docs/schemas/domaincommon.rng | 54 +++++++++++++++++++++++++++------------- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 8d9ab9e..ac55845 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1395,7 +1395,19 @@ rawio='yes', rawio capability will be enabled for all disks in the domain (because, in the case of QEMU, this capability can only be set on a per-process basis). This attribute is only - valid when device is "lun". + valid when device is "lun". NB, <code>rawio</code> intends to + confine the capability per-device, however, current QEMU + implementation gives the domain process broader capability + than that (per-process basis, affects all the domain disks). + To confine the capability as much as possible for QEMU driver + as this stage, <code>sgio</code> is recommended, it's more + secure than <code>rawio</code>. + The optional <code>sgio</code> attribute indicates whether the + kernel will filter unprivileged SG_IO commands for the disk, + valid settings are "filtered" or "unfiltered". Defaults to + "filtered". Same with <code>rawio</code>, <code>sgio</code> + is only valid for device 'lun'. + <span class="since">since 1.0.2</span> The optional <code>snapshot</code> attribute indicates the default behavior of the disk during disk snapshots: "internal" requires a file format such as qcow2 that can store both the diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 0529d62..7a97781 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -971,24 +971,42 @@ --> <define name="disk"> <element name="disk"> - <optional> - <attribute name="device"> - <choice> - <value>floppy</value> - <value>disk</value> - <value>cdrom</value> - <value>lun</value> - </choice> - </attribute> - </optional> - <optional> - <attribute name="rawio"> - <choice> - <value>yes</value> - <value>no</value> - </choice> - </attribute> - </optional> + <choice> + <group> + <optional> + <attribute name="device"> + <choice> + <value>floppy</value> + <value>disk</value> + <value>cdrom</value> + </choice> + </attribute> + </optional> + </group> + <group> + <attribute name="device"> + <choice> + <value>lun</value> + </choice> + </attribute> + <optional> + <attribute name="rawio"> + <choice> + <value>yes</value> + <value>no</value> + </choice> + </attribute> + </optional> + <optional> + <attribute name="sgio"> + <choice> + <value>filtered</value> + <value>unfiltered</value> + </choice> + </attribute> + </optional> + </group> + </choice> <optional> <ref name="snapshot"/> </optional> -- 1.7.7.6

Like "rawio", "sgio" is only allowed for block disk of device type "lun". It doesn't default disk->sgio to "filtered" when parsing, as it won't be able to distinguish explicitly requested "filtered" and a default "filtered" in driver then. We have to error out for explicit request when the kernel doesn't support the new sysfs knob "unpriv_sgio", however, for defaulted "filtered", we can just ignore it if the kernel doesn't support "unpriv_sgio". --- src/conf/domain_conf.c | 55 +++++++++++++++----- src/conf/domain_conf.h | 10 ++++ ...qemuxml2argv-disk-scsi-lun-passthrough-sgio.xml | 32 +++++++++++ tests/qemuxml2xmltest.c | 1 + 4 files changed, 85 insertions(+), 13 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-scsi-lun-passthrough-sgio.xml diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 6a7646e..40a9019 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -242,6 +242,12 @@ VIR_ENUM_IMPL(virDomainDiskIo, VIR_DOMAIN_DISK_IO_LAST, "default", "native", "threads") + +VIR_ENUM_IMPL(virDomainDiskSGIO, VIR_DOMAIN_DISK_SGIO_LAST, + "default", + "filtered", + "unfiltered") + VIR_ENUM_IMPL(virDomainIoEventFd, VIR_DOMAIN_IO_EVENT_FD_LAST, "default", "on", @@ -3595,6 +3601,7 @@ virDomainDiskDefParseXML(virCapsPtr caps, char *device = NULL; char *snapshot = NULL; char *rawio = NULL; + char *sgio = NULL; char *driverName = NULL; char *driverType = NULL; char *source = NULL; @@ -3659,6 +3666,7 @@ virDomainDiskDefParseXML(virCapsPtr caps, snapshot = virXMLPropString(node, "snapshot"); rawio = virXMLPropString(node, "rawio"); + sgio = virXMLPropString(node, "sgio"); cur = node->children; while (cur != NULL) { @@ -4109,22 +4117,32 @@ virDomainDiskDefParseXML(virCapsPtr caps, def->snapshot = VIR_DOMAIN_SNAPSHOT_LOCATION_NONE; } + if ((rawio || sgio) && + (def->device != VIR_DOMAIN_DISK_DEVICE_LUN)) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("rawio or sgio can be used only with " + "device='lun'")); + goto error; + } + if (rawio) { def->rawio_specified = true; - if (def->device == VIR_DOMAIN_DISK_DEVICE_LUN) { - if (STREQ(rawio, "yes")) { - def->rawio = 1; - } else if (STREQ(rawio, "no")) { - def->rawio = 0; - } else { - virReportError(VIR_ERR_XML_ERROR, - _("unknown disk rawio setting '%s'"), - rawio); - goto error; - } + if (STREQ(rawio, "yes")) { + def->rawio = 1; + } else if (STREQ(rawio, "no")) { + def->rawio = 0; } else { - virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", - _("rawio can be used only with device='lun'")); + virReportError(VIR_ERR_XML_ERROR, + _("unknown disk rawio setting '%s'"), + rawio); + goto error; + } + } + + if (sgio) { + if ((def->sgio = virDomainDiskSGIOTypeFromString(sgio)) <= 0) { + virReportError(VIR_ERR_XML_ERROR, + _("unknown disk sgio mode '%s'"), sgio); goto error; } } @@ -4367,6 +4385,7 @@ cleanup: VIR_FREE(type); VIR_FREE(snapshot); VIR_FREE(rawio); + VIR_FREE(sgio); VIR_FREE(target); VIR_FREE(source); VIR_FREE(tray); @@ -12127,6 +12146,7 @@ virDomainDiskDefFormat(virBufferPtr buf, const char *event_idx = virDomainVirtioEventIdxTypeToString(def->event_idx); const char *copy_on_read = virDomainVirtioEventIdxTypeToString(def->copy_on_read); const char *startupPolicy = virDomainStartupPolicyTypeToString(def->startupPolicy); + const char *sgio = virDomainDiskSGIOTypeToString(def->sgio); int n; char uuidstr[VIR_UUID_STRING_BUFLEN]; @@ -12156,6 +12176,11 @@ virDomainDiskDefFormat(virBufferPtr buf, _("unexpected disk io mode %d"), def->iomode); return -1; } + if (!sgio) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Unexpected disk sgio mode '%d'"), def->sgio); + return -1; + } virBufferAsprintf(buf, " <disk type='%s' device='%s'", @@ -12167,6 +12192,10 @@ virDomainDiskDefFormat(virBufferPtr buf, virBufferAddLit(buf, " rawio='no'"); } } + + if (def->sgio) + virBufferAsprintf(buf, " sgio='%s'", sgio); + if (def->snapshot && !(def->snapshot == VIR_DOMAIN_SNAPSHOT_LOCATION_NONE && def->readonly)) virBufferAsprintf(buf, " snapshot='%s'", diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 5062e07..70f8c48 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -566,6 +566,14 @@ enum virDomainDiskSecretType { VIR_DOMAIN_DISK_SECRET_TYPE_LAST }; +enum virDomainDiskSGIO { + VIR_DOMAIN_DISK_SGIO_DEFAULT = 0, + VIR_DOMAIN_DISK_SGIO_FILTERED, + VIR_DOMAIN_DISK_SGIO_UNFILTERED, + + VIR_DOMAIN_DISK_SGIO_LAST +}; + typedef struct _virDomainBlockIoTuneInfo virDomainBlockIoTuneInfo; struct _virDomainBlockIoTuneInfo { unsigned long long total_bytes_sec; @@ -638,6 +646,7 @@ struct _virDomainDiskDef { virStorageEncryptionPtr encryption; bool rawio_specified; int rawio; /* no = 0, yes = 1 */ + int sgio; size_t nseclabels; virSecurityDeviceLabelDefPtr *seclabels; @@ -2232,6 +2241,7 @@ VIR_ENUM_DECL(virDomainDiskProtocol) VIR_ENUM_DECL(virDomainDiskProtocolTransport) VIR_ENUM_DECL(virDomainDiskIo) VIR_ENUM_DECL(virDomainDiskSecretType) +VIR_ENUM_DECL(virDomainDiskSGIO) VIR_ENUM_DECL(virDomainDiskTray) VIR_ENUM_DECL(virDomainIoEventFd) VIR_ENUM_DECL(virDomainVirtioEventIdx) diff --git a/tests/qemuxml2argvdata/qemuxml2argv-disk-scsi-lun-passthrough-sgio.xml b/tests/qemuxml2argvdata/qemuxml2argv-disk-scsi-lun-passthrough-sgio.xml new file mode 100644 index 0000000..eecf609 --- /dev/null +++ b/tests/qemuxml2argvdata/qemuxml2argv-disk-scsi-lun-passthrough-sgio.xml @@ -0,0 +1,32 @@ +<domain type='qemu'> + <name>QEMUGuest1</name> + <uuid>c7a5fdbd-edaf-9455-926a-d65c16db1809</uuid> + <memory unit='KiB'>219136</memory> + <currentMemory unit='KiB'>219136</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='i686' machine='pc'>hvm</type> + <boot dev='hd'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>destroy</on_crash> + <devices> + <emulator>/usr/bin/qemu</emulator> + <disk type='block' device='lun' rawio='no' sgio='unfiltered'> + <source dev='/dev/HostVG/QEMUGuest1'/> + <target dev='hda' bus='scsi'/> + <address type='drive' controller='0' bus='0' target='0' unit='0'/> + </disk> + <disk type='block' device='lun' sgio='filtered'> + <source dev='/dev/HostVG/QEMUGuest2'/> + <target dev='hda' bus='scsi'/> + <address type='drive' controller='0' bus='0' target='1' unit='1'/> + </disk> + <controller type='scsi' index='0' model='virtio-scsi'/> + <controller type='scsi' index='1' model='lsilogic'/> + <controller type='usb' index='0'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index c2d3dd3..160e958 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -238,6 +238,7 @@ mymain(void) DO_TEST("seclabel-static"); DO_TEST("seclabel-none"); DO_TEST("numad-static-vcpu-no-numatune"); + DO_TEST("disk-scsi-lun-passthrough-sgio"); DO_TEST("disk-scsi-disk-vpd"); -- 1.7.7.6

This ignores the default "filtered" if unpriv_sgio is not supported by kernel, but for explicit request "filtered", it error out for domain starting. --- src/libvirt_private.syms | 1 + src/qemu/qemu_driver.c | 15 +++++++++------ src/qemu/qemu_process.c | 31 +++++++++++++++++++++++++++++++ src/qemu/qemu_process.h | 1 + src/util/util.c | 2 +- src/util/util.h | 2 ++ 6 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index e500ea9..3276f72 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1288,6 +1288,7 @@ virGetDeviceUnprivSGIO; virGetGroupID; virGetGroupName; virGetHostname; +virGetUnprivSGIOSysfsPath; virGetUserCacheDirectory; virGetUserConfigDirectory; virGetUserDirectory; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index a313f5e..98b6b59 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -6046,12 +6046,15 @@ qemuDomainAttachDeviceDiskLive(virConnectPtr conn, NULLSTR(disk->src)); } - if (ret == 0 && - disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && - disk->shared) { - if (qemuAddSharedDisk(driver->sharedDisks, disk->src) < 0) - VIR_WARN("Failed to add disk '%s' to shared disk table", - disk->src); + if (ret == 0) { + if (disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && disk->shared) { + if (qemuAddSharedDisk(driver->sharedDisks, disk->src) < 0) + VIR_WARN("Failed to add disk '%s' to shared disk table", + disk->src); + } + + if (qemuSetUnprivSGIO(disk) < 0) + VIR_WARN("Failed to set unpriv_sgio of disk '%s'", disk->src); } end: diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index a84df07..66637d1 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3385,6 +3385,34 @@ qemuProcessReconnectAll(virConnectPtr conn, virQEMUDriverPtr driver) virHashForEach(driver->domains.objs, qemuProcessReconnectHelper, &data); } +int +qemuSetUnprivSGIO(virDomainDiskDefPtr disk) +{ + int val = -1; + + if (disk->sgio) + val = (disk->sgio == VIR_DOMAIN_DISK_SGIO_UNFILTERED); + + /* Ignore the setting if unpriv_sgio is not supported by the + * kernel, otherwise defaults to filter the SG_IO commands, + * I.E. Set unpriv_sgio to 0. + */ + if (disk->sgio == VIR_DOMAIN_DISK_SGIO_DEFAULT && + disk->device == VIR_DOMAIN_DISK_DEVICE_LUN) { + char *sysfs_path = NULL; + + if ((sysfs_path = virGetUnprivSGIOSysfsPath(disk->src, NULL)) && + virFileExists(sysfs_path)) + val = 0; + VIR_FREE(sysfs_path); + } + + if (val >= 0 && virSetDeviceUnprivSGIO(disk->src, NULL, val) < 0) + return -1; + + return 0; +} + int qemuProcessStart(virConnectPtr conn, virQEMUDriverPtr driver, virDomainObjPtr vm, @@ -3734,6 +3762,9 @@ int qemuProcessStart(virConnectPtr conn, if (qemuAddSharedDisk(driver->sharedDisks, disk->src) < 0) goto cleanup; } + + if (qemuSetUnprivSGIO(disk) < 0) + goto cleanup; } virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData); diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index c12df32..52a298d 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -98,5 +98,6 @@ bool qemuProcessAutoDestroyActive(virQEMUDriverPtr driver, virDomainObjPtr vm); virBitmapPtr qemuPrepareCpumap(virQEMUDriverPtr driver, virBitmapPtr nodemask); +int qemuSetUnprivSGIO(virDomainDiskDefPtr disk); #endif /* __QEMU_PROCESS_H__ */ diff --git a/src/util/util.c b/src/util/util.c index 06c7e33..bc6d17f 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -3171,7 +3171,7 @@ virGetDeviceID(const char *path ATRRIBUTE_UNUSED, #define SYSFS_DEV_BLOCK_PATH "/sys/dev/block" -static char * +char * virGetUnprivSGIOSysfsPath(const char *path, const char *sysfs_dir) { diff --git a/src/util/util.h b/src/util/util.h index 71003f7..7f873d6 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -291,5 +291,7 @@ int virSetDeviceUnprivSGIO(const char *path, int virGetDeviceUnprivSGIO(const char *path, const char *sysfs_dir, int *unpriv_sgio); +char * virGetUnprivSGIOSysfsPath(const char *path, + const char *sysfs_dir); #endif /* __VIR_UTIL_H__ */ -- 1.7.7.6

This prevents domain starting and disk attaching if the shared disk's setting conflicts with other active domain(s), E.g. A domain with "sgio" set as "filtered", however, another active domain is using it set as "unfiltered". --- src/qemu/qemu_driver.c | 5 ++++ src/qemu/qemu_process.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++ src/qemu/qemu_process.h | 3 ++ 3 files changed, 61 insertions(+), 0 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 98b6b59..2f2b15a 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -5995,6 +5995,11 @@ qemuDomainAttachDeviceDiskLive(virConnectPtr conn, goto end; } + if (disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && + disk->shared && + (qemuCheckSharedDisk(driver->sharedDisks, disk) < 0)) + goto end; + if (qemuDomainDetermineDiskChain(driver, disk, false) < 0) goto end; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 66637d1..2e4d139 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3413,6 +3413,56 @@ qemuSetUnprivSGIO(virDomainDiskDefPtr disk) return 0; } +/* Check if a shared disk's setting conflicts with the conf + * used by other domain(s). Currently only checks the sgio + * setting. Note that this should only be called for disk with + * block source. + * + * Returns 0 if no conflicts, otherwise returns -1. + */ +int +qemuCheckSharedDisk(virHashTablePtr sharedDisks, + virDomainDiskDefPtr disk) +{ + int val; + size_t *ref = NULL; + char *key = NULL; + int ret = 0; + + if (!(key = qemuGetSharedDiskKey(disk->src))) + return -1; + + /* It can't be conflict if no other domain is + * is sharing it. + */ + if (!(ref = virHashLookup(sharedDisks, key))) + goto cleanup; + + if (ref == (void *)0x1) + goto cleanup; + + if (virGetDeviceUnprivSGIO(disk->src, NULL, &val) < 0) { + ret = -1; + goto cleanup; + } + + if ((val == 0 && + (disk->sgio == VIR_DOMAIN_DISK_SGIO_FILTERED || + disk->sgio == VIR_DOMAIN_DISK_SGIO_DEFAULT)) || + (val == 1 && + disk->sgio == VIR_DOMAIN_DISK_SGIO_UNFILTERED)) + goto cleanup; + + virReportError(VIR_ERR_OPERATION_INVALID, + _("sgio of shared disk '%s' conflicts with other " + "active domains"), disk->src); + ret = -1; + +cleanup: + VIR_FREE(key); + return ret; +} + int qemuProcessStart(virConnectPtr conn, virQEMUDriverPtr driver, virDomainObjPtr vm, @@ -3761,6 +3811,9 @@ int qemuProcessStart(virConnectPtr conn, if (disk->type == VIR_DOMAIN_DISK_TYPE_BLOCK && disk->shared) { if (qemuAddSharedDisk(driver->sharedDisks, disk->src) < 0) goto cleanup; + + if (qemuCheckSharedDisk(driver->sharedDisks, disk) < 0) + goto cleanup; } if (qemuSetUnprivSGIO(disk) < 0) diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index 52a298d..313fa39 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -100,4 +100,7 @@ virBitmapPtr qemuPrepareCpumap(virQEMUDriverPtr driver, virBitmapPtr nodemask); int qemuSetUnprivSGIO(virDomainDiskDefPtr disk); +int qemuCheckSharedDisk(virHashTablePtr sharedDisks, + virDomainDiskDefPtr disk); + #endif /* __QEMU_PROCESS_H__ */ -- 1.7.7.6
participants (1)
-
Osier Yang