[libvirt] [PATCH v2 0/4] add disk driver metadata_cache_size option

Nikolay Shirokovskiy

8 Nov 2018 8 Nov '18

1:02 p.m.

1st docs patch may need tweaks if merged now (if -blockdev will not be available in 4.10) diff from v1 [1] =============== - support only -blockdev configurations - add 'default' value for the attribute - set QEMU_CAPS_QCOW2_L2_CACHE_SIZE only for recent qemu versions (see 2nd patch) - factor out API changes to distinct patch - fix xml2argv test to blockdev configuration - factor out xml2argv test to distinct patch as it is not yet passing - other misc changes [1] https://www.redhat.com/archives/libvir-list/2018-November/msg00016.html Nikolay Shirokovskiy (4): xml: add disk driver metadata_cache_size option qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE qemu: support metadata-cache-size for blockdev DO NOT APPLY: add xml2argv test for metadata_cache_size docs/formatdomain.html.in | 8 ++++ docs/schemas/domaincommon.rng | 11 +++++ src/conf/domain_conf.c | 17 ++++++++ src/conf/domain_conf.h | 9 ++++ src/qemu/qemu_block.c | 5 ++- src/qemu/qemu_capabilities.c | 5 +++ src/qemu/qemu_capabilities.h | 1 + src/qemu/qemu_command.c | 23 +++++++++++ src/qemu/qemu_domain.c | 2 + src/util/virstoragefile.c | 1 + src/util/virstoragefile.h | 1 + .../qemuxml2argvdata/disk-metadata_cache_size.args | 39 ++++++++++++++++++ .../qemuxml2argvdata/disk-metadata_cache_size.xml | 42 +++++++++++++++++++ tests/qemuxml2argvtest.c | 2 + .../disk-metadata_cache_size.xml | 48 ++++++++++++++++++++++ tests/qemuxml2xmltest.c | 2 + 16 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 tests/qemuxml2argvdata/disk-metadata_cache_size.args create mode 100644 tests/qemuxml2argvdata/disk-metadata_cache_size.xml create mode 100644 tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml -- 1.8.3.1

Show replies by date

Nikolay Shirokovskiy

8 Nov 8 Nov

1:02 p.m.

New subject: [libvirt] [PATCH v2 1/4] xml: add disk driver metadata_cache_size option

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- docs/formatdomain.html.in | 8 ++++ docs/schemas/domaincommon.rng | 11 +++++ src/conf/domain_conf.c | 17 ++++++++ src/conf/domain_conf.h | 9 ++++ .../qemuxml2argvdata/disk-metadata_cache_size.xml | 42 +++++++++++++++++++ .../disk-metadata_cache_size.xml | 48 ++++++++++++++++++++++ tests/qemuxml2xmltest.c | 2 + 7 files changed, 137 insertions(+) create mode 100644 tests/qemuxml2argvdata/disk-metadata_cache_size.xml create mode 100644 tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 269741a..1d186ab 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3556,6 +3556,14 @@ virt queues for virtio-blk. (<span class="since">Since 3.9.0</span>) </li> <li> + The optional <code>metadata_cache_size</code> attribute specifies + metadata cache size policy, possible values are "default" and "maximum". + "default" leaves setting cache size to hypervisor, "maximum" makes + cache size large enough to keep all metadata, this will help if workload + needs access to whole disk all the time. (<span class="since">Since + 4.10.0, QEMU 3.1</span>) + </li> + <li> For virtio disks, <a href="#elementsVirtio">Virtio-specific options</a> can also be set. (<span class="since">Since 3.5.0</span>) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index b9ac5df..3e406fc 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1990,6 +1990,9 @@ <ref name="detect_zeroes"/> </optional> <optional> + <ref name="metadata_cache_size"/> + </optional> + <optional> <attribute name='queues'> <ref name="positiveInteger"/> </attribute> @@ -2090,6 +2093,14 @@ </choice> </attribute> </define> + <define name="metadata_cache_size"> + <attribute name='metadata_cache_size'> + <choice> + <value>default</value> + <value>maximum</value> + </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 237540b..b2185f8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -885,6 +885,11 @@ VIR_ENUM_IMPL(virDomainDiskDetectZeroes, VIR_DOMAIN_DISK_DETECT_ZEROES_LAST, "on", "unmap") +VIR_ENUM_IMPL(virDomainDiskMetadataCacheSize, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST, + "default", + "maximum") + VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST, "none", "yes", @@ -9375,6 +9380,14 @@ virDomainDiskDefDriverParseXML(virDomainDiskDefPtr def, } VIR_FREE(tmp); + if ((tmp = virXMLPropString(cur, "metadata_cache_size")) && + (def->metadata_cache_size = virDomainDiskMetadataCacheSizeTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown driver metadata_cache_size value '%s'"), tmp); + goto cleanup; + } + VIR_FREE(tmp); + ret = 0; cleanup: @@ -23902,6 +23915,10 @@ virDomainDiskDefFormatDriver(virBufferPtr buf, if (disk->queues) virBufferAsprintf(&driverBuf, " queues='%u'", disk->queues); + if (disk->metadata_cache_size) + virBufferAsprintf(&driverBuf, " metadata_cache_size='%s'", + virDomainDiskMetadataCacheSizeTypeToString(disk->metadata_cache_size)); + virDomainVirtioOptionsFormat(&driverBuf, disk->virtio); ret = virXMLFormatElement(buf, "driver", &driverBuf, NULL); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index e30a4b2..b155058 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -568,6 +568,13 @@ typedef enum { VIR_DOMAIN_DISK_DETECT_ZEROES_LAST } virDomainDiskDetectZeroes; +typedef enum { + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT = 0, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM, + + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST +} virDomainDiskMetadataCacheSize; + typedef struct _virDomainBlockIoTuneInfo virDomainBlockIoTuneInfo; struct _virDomainBlockIoTuneInfo { unsigned long long total_bytes_sec; @@ -672,6 +679,7 @@ struct _virDomainDiskDef { int discard; /* enum virDomainDiskDiscard */ unsigned int iothread; /* unused = 0, > 0 specific thread # */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskMetadataCacheSize */ char *domain_name; /* backend domain name */ unsigned int queues; virDomainVirtioOptionsPtr virtio; @@ -3388,6 +3396,7 @@ VIR_ENUM_DECL(virDomainDeviceSGIO) VIR_ENUM_DECL(virDomainDiskTray) VIR_ENUM_DECL(virDomainDiskDiscard) VIR_ENUM_DECL(virDomainDiskDetectZeroes) +VIR_ENUM_DECL(virDomainDiskMetadataCacheSize) VIR_ENUM_DECL(virDomainDiskMirrorState) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainControllerModelPCI) diff --git a/tests/qemuxml2argvdata/disk-metadata_cache_size.xml b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..8ac2599 --- /dev/null +++ b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml @@ -0,0 +1,42 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type> + <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'/> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..5fed22b --- /dev/null +++ b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml @@ -0,0 +1,48 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type> + <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 89640f6..c44e0fe 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1235,6 +1235,8 @@ mymain(void) DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO); + DO_TEST("disk-metadata_cache_size", NONE); + if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) virFileDeleteTree(fakerootdir); -- 1.8.3.1

John Ferlan

10 Dec 10 Dec

4:56 p.m.

New subject: [libvirt] [PATCH v2 1/4] xml: add disk driver metadata_cache_size option

On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- docs/formatdomain.html.in | 8 ++++ docs/schemas/domaincommon.rng | 11 +++++ src/conf/domain_conf.c | 17 ++++++++ src/conf/domain_conf.h | 9 ++++ .../qemuxml2argvdata/disk-metadata_cache_size.xml | 42 +++++++++++++++++++ .../disk-metadata_cache_size.xml | 48 ++++++++++++++++++++++ tests/qemuxml2xmltest.c | 2 + 7 files changed, 137 insertions(+) create mode 100644 tests/qemuxml2argvdata/disk-metadata_cache_size.xml create mode 100644 tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml

<sigh> looks like a forgotten thread... It seems reviewer bandwidth is limited and won't get much better during the last couple weeks of the month when many if not most Red Hat employees are out to due company shutdown period. You need to add a few words to the commit message to describe what's being changed. Oh and you'll also need a "last" patch to docs/news.xml to describe the new feature.

...

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 269741a..1d186ab 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3556,6 +3556,14 @@ virt queues for virtio-blk. (<span class="since">Since 3.9.0</span>) </li> <li> + The optional <code>metadata_cache_size</code> attribute specifies + metadata cache size policy, possible values are "default" and "maximum".

s/policy, possible/policy. Possible/

...

+ "default" leaves setting cache size to hypervisor, "maximum" makes

s/"default"/Using "default"/ s/to hypervisor,/to the hypervisor./ s/"maximum"/Using "maximum"

...

+ cache size large enough to keep all metadata, this will help if workload

"Using maximum assigns the largest value possible for the cache size. This ensures the entire disk cache remains in memory for faster I/O at the expense of utilizing more memory." [Editorial comment: it's really not 100% clear what all the tradeoffs someone is making here. The phrase "large enough" just sticks out, but since you use INT64_MAX in patch 3, I suppose that *is* the maximum. Still in some way indicating that this allows QEMU to grow the cache and keep everything in memory, but has the side effect that disks configured this way will cause guest memory requirements to grow albeit limited by the QEMU algorithms. It seems from my read the max is 32MB, so perhaps not a huge deal, but could be useful to note. Whether QEMU shrinks the cache when not in use wasn't 100% clear to me.] It's too bad it's not possible to utilize some dynamic value via qcow2_update_options_prepare. If dynamic adjustment were possible, then saving the value in the XML wouldn't be necessary - we could just allow dynamic adjustment similar to what I did for of a couple of IOThread params (see commit 11ceedcda and the patches before it).

...

+ needs access to whole disk all the time. (<span class="since">Since + 4.10.0, QEMU 3.1</span>)

This will be at least 5.0.0 now.

...

+ </li> + <li> For virtio disks, <a href="#elementsVirtio">Virtio-specific options</a> can also be set. (<span class="since">Since 3.5.0</span>) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index b9ac5df..3e406fc 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1990,6 +1990,9 @@ <ref name="detect_zeroes"/> </optional> <optional> + <ref name="metadata_cache_size"/> + </optional> + <optional> <attribute name='queues'> <ref name="positiveInteger"/> </attribute> @@ -2090,6 +2093,14 @@ </choice> </attribute> </define> + <define name="metadata_cache_size"> + <attribute name='metadata_cache_size'> + <choice> + <value>default</value> + <value>maximum</value>

I didn't go back and read the previous reviews nor was I present for the KVM Forum discussion on this, but default really means "minimum" I think. Even that just doesn't feel "right". There's 3 QEMU knobs, but this changes 1 knob allowing one direction. And yes, changing 3 knobs is also confusing. I "assume" that it's been determined that this one knob has the greatest affect on I/O performance... Reading https://github.com/qemu/qemu/blob/master/docs/qcow2-cache.txt some phrases stick out: 1. "setting the right cache sizes is not a straightforward operation". 2. "In order to choose the cache sizes we need to know how they relate to the amount of allocated space." 3. The limit of the l2 cache size is "32 MB by default on Linux platforms (enough for full coverage of 256 GB images, with the default cluster size)". 4. "QEMU will not use more memory than needed to hold all of the image's L2 tables, regardless of this max. value." So at least providing INT_MAX won't hurt, although given the math in qcow2_update_options_prepare: l2_cache_size /= l2_cache_entry_size; if (l2_cache_size < MIN_L2_CACHE_SIZE) { l2_cache_size = MIN_L2_CACHE_SIZE; } if (l2_cache_size > INT_MAX) { wouldn't that mean we could pass up to "l2_cache_size * l2_cache_entry_size"? Still using minimum/maximum for what amounts to a boolean operation is I think unnecessary. Rather than a list of values, wouldn't having something like "max_metadata_cache='yes'" be just as effective? Alternatively, reading through the above document and thinking about how "useful" it could be to use a more specific value, why not go with "metadata_cache_size=N", where N is in MB and describes the size of the cache to be used. Being in MB ascribes to the need to be a power of 2 and quite frankly keeps a reasonable range assigned to the value. I'd find it hard to fathom a "large enough" disk would improve that much I/O performance in KB adjustments, but I haven't done any analysis of that theory either. I think the above document also gives a "fair" example that could be "reworded" into the libvirt docs in order to help "size" things based on the default QEMU values we're not allowing to be changed and then showing the math what it means to have a 1MB cache, a 2MB cache, an 8MB, etc. vis-a-vis the size of the disk for which the cache is being created. For example, for a disk of up to 8G, the "default" cache would be XXX and setting a value of YYY would help I/O. Similarly for a 16GB, 64GB, etc. Essentially a bit of a guide - although that's starting to border on what should go into the wiki instead.

...

+ </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 237540b..b2185f8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -885,6 +885,11 @@ VIR_ENUM_IMPL(virDomainDiskDetectZeroes, VIR_DOMAIN_DISK_DETECT_ZEROES_LAST, "on", "unmap")

+VIR_ENUM_IMPL(virDomainDiskMetadataCacheSize, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST, + "default", + "maximum") + VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST, "none", "yes", @@ -9375,6 +9380,14 @@ virDomainDiskDefDriverParseXML(virDomainDiskDefPtr def, } VIR_FREE(tmp);

+ if ((tmp = virXMLPropString(cur, "metadata_cache_size")) && + (def->metadata_cache_size = virDomainDiskMetadataCacheSizeTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown driver metadata_cache_size value '%s'"), tmp); + goto cleanup; + } + VIR_FREE(tmp); +

This would just use the YesNo type logic instead or read in a numeric value. BTW: Not sure I'd bother with worrying about checking the QEMU maximum as that could change and then we're stuck with a lower maximum check. Just pass along the value to QEMU and let it fail.

...

ret = 0;

cleanup: @@ -23902,6 +23915,10 @@ virDomainDiskDefFormatDriver(virBufferPtr buf, if (disk->queues) virBufferAsprintf(&driverBuf, " queues='%u'", disk->queues);

+ if (disk->metadata_cache_size) + virBufferAsprintf(&driverBuf, " metadata_cache_size='%s'", + virDomainDiskMetadataCacheSizeTypeToString(disk->metadata_cache_size)); +

My personal crusade... metadata_cache_size > VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT (yes, > 0). Of course this all depends on your final solution - a boolean would only be written if set and an int value only written if > 0 (since 0 would be the "optional" viewpoint).

...

virDomainVirtioOptionsFormat(&driverBuf, disk->virtio);

ret = virXMLFormatElement(buf, "driver", &driverBuf, NULL); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index e30a4b2..b155058 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -568,6 +568,13 @@ typedef enum { VIR_DOMAIN_DISK_DETECT_ZEROES_LAST } virDomainDiskDetectZeroes;

+typedef enum { + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT = 0, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM, + + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST +} virDomainDiskMetadataCacheSize; + typedef struct _virDomainBlockIoTuneInfo virDomainBlockIoTuneInfo; struct _virDomainBlockIoTuneInfo { unsigned long long total_bytes_sec; @@ -672,6 +679,7 @@ struct _virDomainDiskDef { int discard; /* enum virDomainDiskDiscard */ unsigned int iothread; /* unused = 0, > 0 specific thread # */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskMetadataCacheSize */ char *domain_name; /* backend domain name */ unsigned int queues; virDomainVirtioOptionsPtr virtio; @@ -3388,6 +3396,7 @@ VIR_ENUM_DECL(virDomainDeviceSGIO) VIR_ENUM_DECL(virDomainDiskTray) VIR_ENUM_DECL(virDomainDiskDiscard) VIR_ENUM_DECL(virDomainDiskDetectZeroes) +VIR_ENUM_DECL(virDomainDiskMetadataCacheSize) VIR_ENUM_DECL(virDomainDiskMirrorState) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainControllerModelPCI) diff --git a/tests/qemuxml2argvdata/disk-metadata_cache_size.xml b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..8ac2599 --- /dev/null +++ b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml @@ -0,0 +1,42 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type>

Nothing more recent than pc-0.13 ;-) for this copy-pasta.

...

+ <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk>

This second disk isn't necessary John

...

+ <controller type='usb' index='0'/> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..5fed22b --- /dev/null +++ b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml @@ -0,0 +1,48 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type> + <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 89640f6..c44e0fe 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1235,6 +1235,8 @@ mymain(void) DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO);

+ DO_TEST("disk-metadata_cache_size", NONE); + if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) virFileDeleteTree(fakerootdir);

Nikolay Shirokovskiy

11 Dec 11 Dec

10:22 a.m.

New subject: [libvirt] [PATCH v2 1/4] xml: add disk driver metadata_cache_size option

On 10.12.2018 19:56, John Ferlan wrote:

...

On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- docs/formatdomain.html.in | 8 ++++ docs/schemas/domaincommon.rng | 11 +++++ src/conf/domain_conf.c | 17 ++++++++ src/conf/domain_conf.h | 9 ++++ .../qemuxml2argvdata/disk-metadata_cache_size.xml | 42 +++++++++++++++++++ .../disk-metadata_cache_size.xml | 48 ++++++++++++++++++++++ tests/qemuxml2xmltest.c | 2 + 7 files changed, 137 insertions(+) create mode 100644 tests/qemuxml2argvdata/disk-metadata_cache_size.xml create mode 100644 tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml

<sigh> looks like a forgotten thread... It seems reviewer bandwidth is limited and won't get much better during the last couple weeks of the month when many if not most Red Hat employees are out to due company shutdown period.

You need to add a few words to the commit message to describe what's being changed. Oh and you'll also need a "last" patch to docs/news.xml to describe the new feature.

...
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 269741a..1d186ab 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3556,6 +3556,14 @@ virt queues for virtio-blk. (<span class="since">Since 3.9.0</span>) </li> <li> + The optional <code>metadata_cache_size</code> attribute specifies + metadata cache size policy, possible values are "default" and "maximum".

s/policy, possible/policy. Possible/

...
+ "default" leaves setting cache size to hypervisor, "maximum" makes

s/"default"/Using "default"/

s/to hypervisor,/to the hypervisor./

s/"maximum"/Using "maximum"

...
+ cache size large enough to keep all metadata, this will help if workload

"Using maximum assigns the largest value possible for the cache size. This ensures the entire disk cache remains in memory for faster I/O at the expense of utilizing more memory."

[Editorial comment: it's really not 100% clear what all the tradeoffs someone is making here. The phrase "large enough" just sticks out, but since you use INT64_MAX in patch 3, I suppose that *is* the maximum. Still in some way indicating that this allows QEMU to grow the cache and keep everything in memory, but has the side effect that disks configured this way will cause guest memory requirements to grow albeit limited by the QEMU algorithms. It seems from my read the max is 32MB, so perhaps not a huge deal, but could be useful to note. Whether QEMU shrinks the cache when not in use wasn't 100% clear to me.]

It's too bad it's not possible to utilize some dynamic value via qcow2_update_options_prepare. If dynamic adjustment were possible, then saving the value in the XML wouldn't be necessary - we could just allow dynamic adjustment similar to what I did for of a couple of IOThread params (see commit 11ceedcda and the patches before it).

...
+ needs access to whole disk all the time. (<span class="since">Since + 4.10.0, QEMU 3.1</span>)

This will be at least 5.0.0 now.

...
+ </li> + <li> For virtio disks, <a href="#elementsVirtio">Virtio-specific options</a> can also be set. (<span class="since">Since 3.5.0</span>) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index b9ac5df..3e406fc 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1990,6 +1990,9 @@ <ref name="detect_zeroes"/> </optional> <optional> + <ref name="metadata_cache_size"/> + </optional> + <optional> <attribute name='queues'> <ref name="positiveInteger"/> </attribute> @@ -2090,6 +2093,14 @@ </choice> </attribute> </define> + <define name="metadata_cache_size"> + <attribute name='metadata_cache_size'> + <choice> + <value>default</value> + <value>maximum</value>

I didn't go back and read the previous reviews nor was I present for the KVM Forum discussion on this, but default really means "minimum" I think. Even that just doesn't feel "right". There's 3 QEMU knobs, but this changes 1 knob allowing one direction. And yes, changing 3 knobs is also confusing. I "assume" that it's been determined that this one knob has the greatest affect on I/O performance...

Reading https://github.com/qemu/qemu/blob/master/docs/qcow2-cache.txt some phrases stick out:

1. "setting the right cache sizes is not a straightforward operation". 2. "In order to choose the cache sizes we need to know how they relate to the amount of allocated space." 3. The limit of the l2 cache size is "32 MB by default on Linux platforms (enough for full coverage of 256 GB images, with the default cluster size)". 4. "QEMU will not use more memory than needed to hold all of the image's L2 tables, regardless of this max. value."

So at least providing INT_MAX won't hurt, although given the math in qcow2_update_options_prepare:

l2_cache_size /= l2_cache_entry_size; if (l2_cache_size < MIN_L2_CACHE_SIZE) { l2_cache_size = MIN_L2_CACHE_SIZE; } if (l2_cache_size > INT_MAX) {

wouldn't that mean we could pass up to "l2_cache_size * l2_cache_entry_size"?

You mean up to INT_MAX / l2_cache_entry_size? No, because there is limitation also that comes from read_cache_sizes so cache will be limited to maximum needs of disk size.

...

Still using minimum/maximum for what amounts to a boolean operation is I think unnecessary. Rather than a list of values, wouldn't having something like "max_metadata_cache='yes'" be just as effective?

Well introducing metadata_cache_size enum is what it was agreed on KVM Forum...

...

Alternatively, reading through the above document and thinking about how "useful" it could be to use a more specific value, why not go with "metadata_cache_size=N", where N is in MB and describes the size of the cache to be used. Being in MB ascribes to the need to be a power of 2 and quite frankly keeps a reasonable range assigned to the value. I'd find it hard to fathom a "large enough" disk would improve that much I/O performance in KB adjustments, but I haven't done any analysis of that theory either.

There a lot of discussion of this cache parameter in this list, bugzilla (sorry for not providing links) that boils down to one does not know how to set it properly, so having option that is in MB is hard to use. On the other hand there are cases (synthetic tests, large database) when default cache size is clearly not big enough so in course of discussion it was decided to have besides default cache size also cache size that cover whole disk so IO performance will not degrade whatever guest will do. Thus this 'maximum' set. 'default' is just an explicit default (implicit is omitted metadata_cache_size option of course). Also having enum instead of bool is a bit future proof ) Nikolay

...

I think the above document also gives a "fair" example that could be "reworded" into the libvirt docs in order to help "size" things based on the default QEMU values we're not allowing to be changed and then showing the math what it means to have a 1MB cache, a 2MB cache, an 8MB, etc. vis-a-vis the size of the disk for which the cache is being created. For example, for a disk of up to 8G, the "default" cache would be XXX and setting a value of YYY would help I/O. Similarly for a 16GB, 64GB, etc. Essentially a bit of a guide - although that's starting to border on what should go into the wiki instead.

...
+ </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 237540b..b2185f8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -885,6 +885,11 @@ VIR_ENUM_IMPL(virDomainDiskDetectZeroes, VIR_DOMAIN_DISK_DETECT_ZEROES_LAST, "on", "unmap")

+VIR_ENUM_IMPL(virDomainDiskMetadataCacheSize, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST, + "default", + "maximum") + VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST, "none", "yes", @@ -9375,6 +9380,14 @@ virDomainDiskDefDriverParseXML(virDomainDiskDefPtr def, } VIR_FREE(tmp);

+ if ((tmp = virXMLPropString(cur, "metadata_cache_size")) && + (def->metadata_cache_size = virDomainDiskMetadataCacheSizeTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown driver metadata_cache_size value '%s'"), tmp); + goto cleanup; + } + VIR_FREE(tmp); +

This would just use the YesNo type logic instead or read in a numeric value. BTW: Not sure I'd bother with worrying about checking the QEMU maximum as that could change and then we're stuck with a lower maximum check. Just pass along the value to QEMU and let it fail.

...
ret = 0;

cleanup: @@ -23902,6 +23915,10 @@ virDomainDiskDefFormatDriver(virBufferPtr buf, if (disk->queues) virBufferAsprintf(&driverBuf, " queues='%u'", disk->queues);

+ if (disk->metadata_cache_size) + virBufferAsprintf(&driverBuf, " metadata_cache_size='%s'", + virDomainDiskMetadataCacheSizeTypeToString(disk->metadata_cache_size)); +

My personal crusade... metadata_cache_size > VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT (yes, > 0).

Of course this all depends on your final solution - a boolean would only be written if set and an int value only written if > 0 (since 0 would be the "optional" viewpoint).

...
virDomainVirtioOptionsFormat(&driverBuf, disk->virtio);

ret = virXMLFormatElement(buf, "driver", &driverBuf, NULL); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index e30a4b2..b155058 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -568,6 +568,13 @@ typedef enum { VIR_DOMAIN_DISK_DETECT_ZEROES_LAST } virDomainDiskDetectZeroes;

+typedef enum { + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT = 0, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM, + + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST +} virDomainDiskMetadataCacheSize; + typedef struct _virDomainBlockIoTuneInfo virDomainBlockIoTuneInfo; struct _virDomainBlockIoTuneInfo { unsigned long long total_bytes_sec; @@ -672,6 +679,7 @@ struct _virDomainDiskDef { int discard; /* enum virDomainDiskDiscard */ unsigned int iothread; /* unused = 0, > 0 specific thread # */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskMetadataCacheSize */ char *domain_name; /* backend domain name */ unsigned int queues; virDomainVirtioOptionsPtr virtio; @@ -3388,6 +3396,7 @@ VIR_ENUM_DECL(virDomainDeviceSGIO) VIR_ENUM_DECL(virDomainDiskTray) VIR_ENUM_DECL(virDomainDiskDiscard) VIR_ENUM_DECL(virDomainDiskDetectZeroes) +VIR_ENUM_DECL(virDomainDiskMetadataCacheSize) VIR_ENUM_DECL(virDomainDiskMirrorState) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainControllerModelPCI) diff --git a/tests/qemuxml2argvdata/disk-metadata_cache_size.xml b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..8ac2599 --- /dev/null +++ b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml @@ -0,0 +1,42 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type>

Nothing more recent than pc-0.13 ;-) for this copy-pasta.

...
+ <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk>

This second disk isn't necessary

John

...
+ <controller type='usb' index='0'/> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..5fed22b --- /dev/null +++ b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml @@ -0,0 +1,48 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type> + <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 89640f6..c44e0fe 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1235,6 +1235,8 @@ mymain(void) DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO);

+ DO_TEST("disk-metadata_cache_size", NONE); + if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) virFileDeleteTree(fakerootdir);

John Ferlan

4:34 p.m.

New subject: [libvirt] [PATCH v2 1/4] xml: add disk driver metadata_cache_size option

On 12/11/18 5:22 AM, Nikolay Shirokovskiy wrote:

...

On 10.12.2018 19:56, John Ferlan wrote:

...
On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- docs/formatdomain.html.in | 8 ++++ docs/schemas/domaincommon.rng | 11 +++++ src/conf/domain_conf.c | 17 ++++++++ src/conf/domain_conf.h | 9 ++++ .../qemuxml2argvdata/disk-metadata_cache_size.xml | 42 +++++++++++++++++++ .../disk-metadata_cache_size.xml | 48 ++++++++++++++++++++++ tests/qemuxml2xmltest.c | 2 + 7 files changed, 137 insertions(+) create mode 100644 tests/qemuxml2argvdata/disk-metadata_cache_size.xml create mode 100644 tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml

<sigh> looks like a forgotten thread... It seems reviewer bandwidth is limited and won't get much better during the last couple weeks of the month when many if not most Red Hat employees are out to due company shutdown period.

You need to add a few words to the commit message to describe what's being changed. Oh and you'll also need a "last" patch to docs/news.xml to describe the new feature.

...
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 269741a..1d186ab 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3556,6 +3556,14 @@ virt queues for virtio-blk. (<span class="since">Since 3.9.0</span>) </li> <li> + The optional <code>metadata_cache_size</code> attribute specifies + metadata cache size policy, possible values are "default" and "maximum".

s/policy, possible/policy. Possible/

...
+ "default" leaves setting cache size to hypervisor, "maximum" makes

s/"default"/Using "default"/

s/to hypervisor,/to the hypervisor./

s/"maximum"/Using "maximum"

...
+ cache size large enough to keep all metadata, this will help if workload

"Using maximum assigns the largest value possible for the cache size. This ensures the entire disk cache remains in memory for faster I/O at the expense of utilizing more memory."

[Editorial comment: it's really not 100% clear what all the tradeoffs someone is making here. The phrase "large enough" just sticks out, but since you use INT64_MAX in patch 3, I suppose that *is* the maximum. Still in some way indicating that this allows QEMU to grow the cache and keep everything in memory, but has the side effect that disks configured this way will cause guest memory requirements to grow albeit limited by the QEMU algorithms. It seems from my read the max is 32MB, so perhaps not a huge deal, but could be useful to note. Whether QEMU shrinks the cache when not in use wasn't 100% clear to me.]

It's too bad it's not possible to utilize some dynamic value via qcow2_update_options_prepare. If dynamic adjustment were possible, then saving the value in the XML wouldn't be necessary - we could just allow dynamic adjustment similar to what I did for of a couple of IOThread params (see commit 11ceedcda and the patches before it).

...
+ needs access to whole disk all the time. (<span class="since">Since + 4.10.0, QEMU 3.1</span>)

This will be at least 5.0.0 now.

...
+ </li> + <li> For virtio disks, <a href="#elementsVirtio">Virtio-specific options</a> can also be set. (<span class="since">Since 3.5.0</span>) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index b9ac5df..3e406fc 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1990,6 +1990,9 @@ <ref name="detect_zeroes"/> </optional> <optional> + <ref name="metadata_cache_size"/> + </optional> + <optional> <attribute name='queues'> <ref name="positiveInteger"/> </attribute> @@ -2090,6 +2093,14 @@ </choice> </attribute> </define> + <define name="metadata_cache_size"> + <attribute name='metadata_cache_size'> + <choice> + <value>default</value> + <value>maximum</value>

I didn't go back and read the previous reviews nor was I present for the KVM Forum discussion on this, but default really means "minimum" I think. Even that just doesn't feel "right". There's 3 QEMU knobs, but this changes 1 knob allowing one direction. And yes, changing 3 knobs is also confusing. I "assume" that it's been determined that this one knob has the greatest affect on I/O performance...

Reading https://github.com/qemu/qemu/blob/master/docs/qcow2-cache.txt some phrases stick out:

1. "setting the right cache sizes is not a straightforward operation". 2. "In order to choose the cache sizes we need to know how they relate to the amount of allocated space." 3. The limit of the l2 cache size is "32 MB by default on Linux platforms (enough for full coverage of 256 GB images, with the default cluster size)". 4. "QEMU will not use more memory than needed to hold all of the image's L2 tables, regardless of this max. value."

So at least providing INT_MAX won't hurt, although given the math in qcow2_update_options_prepare:

l2_cache_size /= l2_cache_entry_size; if (l2_cache_size < MIN_L2_CACHE_SIZE) { l2_cache_size = MIN_L2_CACHE_SIZE; } if (l2_cache_size > INT_MAX) {

wouldn't that mean we could pass up to "l2_cache_size * l2_cache_entry_size"?

You mean up to INT_MAX / l2_cache_entry_size? No, because there is limitation also that comes from read_cache_sizes so cache will be limited to maximum needs of disk size.

So many dependencies... So much math to look at.

...

...
Still using minimum/maximum for what amounts to a boolean operation is I think unnecessary. Rather than a list of values, wouldn't having something like "max_metadata_cache='yes'" be just as effective?

Well introducing metadata_cache_size enum is what it was agreed on KVM Forum...

But yet providing maximum runs into immediate issues because of bugs that are fixed in 3.1. Devil's advocate says - why not patch QEMU to add something that causes the allocation algorithm to use the maximum values for great I/O performance - that way mgmt apps don't have to figure out the math and if the QEMU algorithm for this ever changes and guest XML settings don't have a negative impact. The mgmt app just has to signify it's desire to use algorithms that "can" improve I/O performance significantly.

...

...
Alternatively, reading through the above document and thinking about how "useful" it could be to use a more specific value, why not go with "metadata_cache_size=N", where N is in MB and describes the size of the cache to be used. Being in MB ascribes to the need to be a power of 2 and quite frankly keeps a reasonable range assigned to the value. I'd find it hard to fathom a "large enough" disk would improve that much I/O performance in KB adjustments, but I haven't done any analysis of that theory either.

There a lot of discussion of this cache parameter in this list, bugzilla (sorry for not providing links) that boils down to one does not know how to set it properly, so having option that is in MB is hard to use. On

I know there's lots of discussion on this - you did list a few series in your first cover. Using MB was merely a reaction to reading the document and seeing it says "The maximum L2 cache size is 32 MB by default on Linux platforms". So a value between 1 to 32 could be used if someone wants to go through the pain of figuring out the nuances of the algorithm and difference in performance in their environment for each step. Not providing a "set" number though satisfies the desire to avoid some number being baked into the guest XML permanently.

...

the other hand there are cases (synthetic tests, large database) when default cache size is clearly not big enough so in course of discussion it was decided to have besides default cache size also cache size that cover whole disk so IO performance will not degrade whatever guest will do.

I understand the need... Many previous series have shown that!

...

Thus this 'maximum' set. 'default' is just an explicit default (implicit is omitted metadata_cache_size option of course).

The chosen implementation is boolean - all or default. Was there discussion that also noted that perhaps we should add a 3rd or 4th enum or string to describe some condition between default and maximum ("quarter", "half", "threefourths")? In the long run it doesn't matter, but as the implementation is posted it is a boolean operation. I can live with "minimum" and "maximum", but feel it's important to note the obvious especially since trying to describe anything between could take the PHD (piled higher and deeper) of doc writing abilities.

...

Also having enum instead of bool is a bit future proof )

In my mind only if the choice is at some time in the future to add something between default and maximum. I honestly don't see that happening, but if the group decision at KVM Forum was to go with some sort of enum - I can live with that. Perhaps something was discussed there that just isn't obvious with my short term tunnel vision. John

...

Nikolay

...
I think the above document also gives a "fair" example that could be "reworded" into the libvirt docs in order to help "size" things based on the default QEMU values we're not allowing to be changed and then showing the math what it means to have a 1MB cache, a 2MB cache, an 8MB, etc. vis-a-vis the size of the disk for which the cache is being created. For example, for a disk of up to 8G, the "default" cache would be XXX and setting a value of YYY would help I/O. Similarly for a 16GB, 64GB, etc. Essentially a bit of a guide - although that's starting to border on what should go into the wiki instead.

...
+ </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 237540b..b2185f8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -885,6 +885,11 @@ VIR_ENUM_IMPL(virDomainDiskDetectZeroes, VIR_DOMAIN_DISK_DETECT_ZEROES_LAST, "on", "unmap")

+VIR_ENUM_IMPL(virDomainDiskMetadataCacheSize, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST, + "default", + "maximum") + VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST, "none", "yes", @@ -9375,6 +9380,14 @@ virDomainDiskDefDriverParseXML(virDomainDiskDefPtr def, } VIR_FREE(tmp);

+ if ((tmp = virXMLPropString(cur, "metadata_cache_size")) && + (def->metadata_cache_size = virDomainDiskMetadataCacheSizeTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown driver metadata_cache_size value '%s'"), tmp); + goto cleanup; + } + VIR_FREE(tmp); +

This would just use the YesNo type logic instead or read in a numeric value. BTW: Not sure I'd bother with worrying about checking the QEMU maximum as that could change and then we're stuck with a lower maximum check. Just pass along the value to QEMU and let it fail.

...
ret = 0;

cleanup: @@ -23902,6 +23915,10 @@ virDomainDiskDefFormatDriver(virBufferPtr buf, if (disk->queues) virBufferAsprintf(&driverBuf, " queues='%u'", disk->queues);

+ if (disk->metadata_cache_size) + virBufferAsprintf(&driverBuf, " metadata_cache_size='%s'", + virDomainDiskMetadataCacheSizeTypeToString(disk->metadata_cache_size)); +

My personal crusade... metadata_cache_size > VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT (yes, > 0).

Of course this all depends on your final solution - a boolean would only be written if set and an int value only written if > 0 (since 0 would be the "optional" viewpoint).

...
virDomainVirtioOptionsFormat(&driverBuf, disk->virtio);

ret = virXMLFormatElement(buf, "driver", &driverBuf, NULL); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index e30a4b2..b155058 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -568,6 +568,13 @@ typedef enum { VIR_DOMAIN_DISK_DETECT_ZEROES_LAST } virDomainDiskDetectZeroes;

+typedef enum { + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT = 0, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM, + + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST +} virDomainDiskMetadataCacheSize; + typedef struct _virDomainBlockIoTuneInfo virDomainBlockIoTuneInfo; struct _virDomainBlockIoTuneInfo { unsigned long long total_bytes_sec; @@ -672,6 +679,7 @@ struct _virDomainDiskDef { int discard; /* enum virDomainDiskDiscard */ unsigned int iothread; /* unused = 0, > 0 specific thread # */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskMetadataCacheSize */ char *domain_name; /* backend domain name */ unsigned int queues; virDomainVirtioOptionsPtr virtio; @@ -3388,6 +3396,7 @@ VIR_ENUM_DECL(virDomainDeviceSGIO) VIR_ENUM_DECL(virDomainDiskTray) VIR_ENUM_DECL(virDomainDiskDiscard) VIR_ENUM_DECL(virDomainDiskDetectZeroes) +VIR_ENUM_DECL(virDomainDiskMetadataCacheSize) VIR_ENUM_DECL(virDomainDiskMirrorState) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainControllerModelPCI) diff --git a/tests/qemuxml2argvdata/disk-metadata_cache_size.xml b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..8ac2599 --- /dev/null +++ b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml @@ -0,0 +1,42 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type>

Nothing more recent than pc-0.13 ;-) for this copy-pasta.

...
+ <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk>

This second disk isn't necessary

John

...
+ <controller type='usb' index='0'/> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..5fed22b --- /dev/null +++ b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml @@ -0,0 +1,48 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type> + <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 89640f6..c44e0fe 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1235,6 +1235,8 @@ mymain(void) DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO);

+ DO_TEST("disk-metadata_cache_size", NONE); + if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) virFileDeleteTree(fakerootdir);

Nikolay Shirokovskiy

12 Dec 12 Dec

8:39 a.m.

New subject: [libvirt] [PATCH v2 1/4] xml: add disk driver metadata_cache_size option

On 11.12.2018 19:34, John Ferlan wrote:

...

On 12/11/18 5:22 AM, Nikolay Shirokovskiy wrote:

...
On 10.12.2018 19:56, John Ferlan wrote:

...
On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- docs/formatdomain.html.in | 8 ++++ docs/schemas/domaincommon.rng | 11 +++++ src/conf/domain_conf.c | 17 ++++++++ src/conf/domain_conf.h | 9 ++++ .../qemuxml2argvdata/disk-metadata_cache_size.xml | 42 +++++++++++++++++++ .../disk-metadata_cache_size.xml | 48 ++++++++++++++++++++++ tests/qemuxml2xmltest.c | 2 + 7 files changed, 137 insertions(+) create mode 100644 tests/qemuxml2argvdata/disk-metadata_cache_size.xml create mode 100644 tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml

<sigh> looks like a forgotten thread... It seems reviewer bandwidth is limited and won't get much better during the last couple weeks of the month when many if not most Red Hat employees are out to due company shutdown period.

You need to add a few words to the commit message to describe what's being changed. Oh and you'll also need a "last" patch to docs/news.xml to describe the new feature.

...
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 269741a..1d186ab 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3556,6 +3556,14 @@ virt queues for virtio-blk. (<span class="since">Since 3.9.0</span>) </li> <li> + The optional <code>metadata_cache_size</code> attribute specifies + metadata cache size policy, possible values are "default" and "maximum".

s/policy, possible/policy. Possible/

...
+ "default" leaves setting cache size to hypervisor, "maximum" makes

s/"default"/Using "default"/

s/to hypervisor,/to the hypervisor./

s/"maximum"/Using "maximum"

...
+ cache size large enough to keep all metadata, this will help if workload

"Using maximum assigns the largest value possible for the cache size. This ensures the entire disk cache remains in memory for faster I/O at the expense of utilizing more memory."

[Editorial comment: it's really not 100% clear what all the tradeoffs someone is making here. The phrase "large enough" just sticks out, but since you use INT64_MAX in patch 3, I suppose that *is* the maximum. Still in some way indicating that this allows QEMU to grow the cache and keep everything in memory, but has the side effect that disks configured this way will cause guest memory requirements to grow albeit limited by the QEMU algorithms. It seems from my read the max is 32MB, so perhaps not a huge deal, but could be useful to note. Whether QEMU shrinks the cache when not in use wasn't 100% clear to me.]

It's too bad it's not possible to utilize some dynamic value via qcow2_update_options_prepare. If dynamic adjustment were possible, then saving the value in the XML wouldn't be necessary - we could just allow dynamic adjustment similar to what I did for of a couple of IOThread params (see commit 11ceedcda and the patches before it).

...
+ needs access to whole disk all the time. (<span class="since">Since + 4.10.0, QEMU 3.1</span>)

This will be at least 5.0.0 now.

...
+ </li> + <li> For virtio disks, <a href="#elementsVirtio">Virtio-specific options</a> can also be set. (<span class="since">Since 3.5.0</span>) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index b9ac5df..3e406fc 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1990,6 +1990,9 @@ <ref name="detect_zeroes"/> </optional> <optional> + <ref name="metadata_cache_size"/> + </optional> + <optional> <attribute name='queues'> <ref name="positiveInteger"/> </attribute> @@ -2090,6 +2093,14 @@ </choice> </attribute> </define> + <define name="metadata_cache_size"> + <attribute name='metadata_cache_size'> + <choice> + <value>default</value> + <value>maximum</value>

I didn't go back and read the previous reviews nor was I present for the KVM Forum discussion on this, but default really means "minimum" I think. Even that just doesn't feel "right". There's 3 QEMU knobs, but this changes 1 knob allowing one direction. And yes, changing 3 knobs is also confusing. I "assume" that it's been determined that this one knob has the greatest affect on I/O performance...

Reading https://github.com/qemu/qemu/blob/master/docs/qcow2-cache.txt some phrases stick out:

1. "setting the right cache sizes is not a straightforward operation". 2. "In order to choose the cache sizes we need to know how they relate to the amount of allocated space." 3. The limit of the l2 cache size is "32 MB by default on Linux platforms (enough for full coverage of 256 GB images, with the default cluster size)". 4. "QEMU will not use more memory than needed to hold all of the image's L2 tables, regardless of this max. value."

So at least providing INT_MAX won't hurt, although given the math in qcow2_update_options_prepare:

l2_cache_size /= l2_cache_entry_size; if (l2_cache_size < MIN_L2_CACHE_SIZE) { l2_cache_size = MIN_L2_CACHE_SIZE; } if (l2_cache_size > INT_MAX) {

wouldn't that mean we could pass up to "l2_cache_size * l2_cache_entry_size"?

You mean up to INT_MAX / l2_cache_entry_size? No, because there is limitation also that comes from read_cache_sizes so cache will be limited to maximum needs of disk size.

So many dependencies... So much math to look at.

...
...
Still using minimum/maximum for what amounts to a boolean operation is I think unnecessary. Rather than a list of values, wouldn't having something like "max_metadata_cache='yes'" be just as effective?

Well introducing metadata_cache_size enum is what it was agreed on KVM Forum...

But yet providing maximum runs into immediate issues because of bugs that are fixed in 3.1.

Devil's advocate says - why not patch QEMU to add something that causes the allocation algorithm to use the maximum values for great I/O performance - that way mgmt apps don't have to figure out the math and if the QEMU algorithm for this ever changes and guest XML settings don't have a negative impact. The mgmt app just has to signify it's desire to use algorithms that "can" improve I/O performance significantly.

But this is exactly the case with approach this patch presents. Only 'maximum' cache setting is possible which corresponds to maxium IO. All the complexities of possible cache size values are not revealed. Nikolay

...

...
...
Alternatively, reading through the above document and thinking about how "useful" it could be to use a more specific value, why not go with "metadata_cache_size=N", where N is in MB and describes the size of the cache to be used. Being in MB ascribes to the need to be a power of 2 and quite frankly keeps a reasonable range assigned to the value. I'd find it hard to fathom a "large enough" disk would improve that much I/O performance in KB adjustments, but I haven't done any analysis of that theory either.

There a lot of discussion of this cache parameter in this list, bugzilla (sorry for not providing links) that boils down to one does not know how to set it properly, so having option that is in MB is hard to use. On

I know there's lots of discussion on this - you did list a few series in your first cover. Using MB was merely a reaction to reading the document and seeing it says "The maximum L2 cache size is 32 MB by default on Linux platforms". So a value between 1 to 32 could be used if someone wants to go through the pain of figuring out the nuances of the algorithm and difference in performance in their environment for each step. Not providing a "set" number though satisfies the desire to avoid some number being baked into the guest XML permanently.

...
the other hand there are cases (synthetic tests, large database) when default cache size is clearly not big enough so in course of discussion it was decided to have besides default cache size also cache size that cover whole disk so IO performance will not degrade whatever guest will do.

I understand the need... Many previous series have shown that!

...
Thus this 'maximum' set. 'default' is just an explicit default (implicit is omitted metadata_cache_size option of course).

The chosen implementation is boolean - all or default. Was there discussion that also noted that perhaps we should add a 3rd or 4th enum or string to describe some condition between default and maximum ("quarter", "half", "threefourths")?

In the long run it doesn't matter, but as the implementation is posted it is a boolean operation. I can live with "minimum" and "maximum", but feel it's important to note the obvious especially since trying to describe anything between could take the PHD (piled higher and deeper) of doc writing abilities.

...
Also having enum instead of bool is a bit future proof )

In my mind only if the choice is at some time in the future to add something between default and maximum. I honestly don't see that happening, but if the group decision at KVM Forum was to go with some sort of enum - I can live with that. Perhaps something was discussed there that just isn't obvious with my short term tunnel vision.

John

...
Nikolay

...
I think the above document also gives a "fair" example that could be "reworded" into the libvirt docs in order to help "size" things based on the default QEMU values we're not allowing to be changed and then showing the math what it means to have a 1MB cache, a 2MB cache, an 8MB, etc. vis-a-vis the size of the disk for which the cache is being created. For example, for a disk of up to 8G, the "default" cache would be XXX and setting a value of YYY would help I/O. Similarly for a 16GB, 64GB, etc. Essentially a bit of a guide - although that's starting to border on what should go into the wiki instead.

...
+ </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 237540b..b2185f8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -885,6 +885,11 @@ VIR_ENUM_IMPL(virDomainDiskDetectZeroes, VIR_DOMAIN_DISK_DETECT_ZEROES_LAST, "on", "unmap")

+VIR_ENUM_IMPL(virDomainDiskMetadataCacheSize, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST, + "default", + "maximum") + VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST, "none", "yes", @@ -9375,6 +9380,14 @@ virDomainDiskDefDriverParseXML(virDomainDiskDefPtr def, } VIR_FREE(tmp);

+ if ((tmp = virXMLPropString(cur, "metadata_cache_size")) && + (def->metadata_cache_size = virDomainDiskMetadataCacheSizeTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown driver metadata_cache_size value '%s'"), tmp); + goto cleanup; + } + VIR_FREE(tmp); +

This would just use the YesNo type logic instead or read in a numeric value. BTW: Not sure I'd bother with worrying about checking the QEMU maximum as that could change and then we're stuck with a lower maximum check. Just pass along the value to QEMU and let it fail.

...
ret = 0;

cleanup: @@ -23902,6 +23915,10 @@ virDomainDiskDefFormatDriver(virBufferPtr buf, if (disk->queues) virBufferAsprintf(&driverBuf, " queues='%u'", disk->queues);

+ if (disk->metadata_cache_size) + virBufferAsprintf(&driverBuf, " metadata_cache_size='%s'", + virDomainDiskMetadataCacheSizeTypeToString(disk->metadata_cache_size)); +

My personal crusade... metadata_cache_size > VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT (yes, > 0).

Of course this all depends on your final solution - a boolean would only be written if set and an int value only written if > 0 (since 0 would be the "optional" viewpoint).

...
virDomainVirtioOptionsFormat(&driverBuf, disk->virtio);

ret = virXMLFormatElement(buf, "driver", &driverBuf, NULL); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index e30a4b2..b155058 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -568,6 +568,13 @@ typedef enum { VIR_DOMAIN_DISK_DETECT_ZEROES_LAST } virDomainDiskDetectZeroes;

+typedef enum { + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT = 0, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM, + + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST +} virDomainDiskMetadataCacheSize; + typedef struct _virDomainBlockIoTuneInfo virDomainBlockIoTuneInfo; struct _virDomainBlockIoTuneInfo { unsigned long long total_bytes_sec; @@ -672,6 +679,7 @@ struct _virDomainDiskDef { int discard; /* enum virDomainDiskDiscard */ unsigned int iothread; /* unused = 0, > 0 specific thread # */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskMetadataCacheSize */ char *domain_name; /* backend domain name */ unsigned int queues; virDomainVirtioOptionsPtr virtio; @@ -3388,6 +3396,7 @@ VIR_ENUM_DECL(virDomainDeviceSGIO) VIR_ENUM_DECL(virDomainDiskTray) VIR_ENUM_DECL(virDomainDiskDiscard) VIR_ENUM_DECL(virDomainDiskDetectZeroes) +VIR_ENUM_DECL(virDomainDiskMetadataCacheSize) VIR_ENUM_DECL(virDomainDiskMirrorState) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainControllerModelPCI) diff --git a/tests/qemuxml2argvdata/disk-metadata_cache_size.xml b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..8ac2599 --- /dev/null +++ b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml @@ -0,0 +1,42 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type>

Nothing more recent than pc-0.13 ;-) for this copy-pasta.

...
+ <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk>

This second disk isn't necessary

John

...
+ <controller type='usb' index='0'/> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..5fed22b --- /dev/null +++ b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml @@ -0,0 +1,48 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type> + <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 89640f6..c44e0fe 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1235,6 +1235,8 @@ mymain(void) DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO);

+ DO_TEST("disk-metadata_cache_size", NONE); + if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) virFileDeleteTree(fakerootdir);

John Ferlan

1:37 p.m.

New subject: [libvirt] [PATCH v2 1/4] xml: add disk driver metadata_cache_size option

On 12/12/18 3:39 AM, Nikolay Shirokovskiy wrote:

...

On 11.12.2018 19:34, John Ferlan wrote:

...
On 12/11/18 5:22 AM, Nikolay Shirokovskiy wrote:

...
On 10.12.2018 19:56, John Ferlan wrote:

...
On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- docs/formatdomain.html.in | 8 ++++ docs/schemas/domaincommon.rng | 11 +++++ src/conf/domain_conf.c | 17 ++++++++ src/conf/domain_conf.h | 9 ++++ .../qemuxml2argvdata/disk-metadata_cache_size.xml | 42 +++++++++++++++++++ .../disk-metadata_cache_size.xml | 48 ++++++++++++++++++++++ tests/qemuxml2xmltest.c | 2 + 7 files changed, 137 insertions(+) create mode 100644 tests/qemuxml2argvdata/disk-metadata_cache_size.xml create mode 100644 tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml

<sigh> looks like a forgotten thread... It seems reviewer bandwidth is limited and won't get much better during the last couple weeks of the month when many if not most Red Hat employees are out to due company shutdown period.

You need to add a few words to the commit message to describe what's being changed. Oh and you'll also need a "last" patch to docs/news.xml to describe the new feature.

...
diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 269741a..1d186ab 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -3556,6 +3556,14 @@ virt queues for virtio-blk. (<span class="since">Since 3.9.0</span>) </li> <li> + The optional <code>metadata_cache_size</code> attribute specifies + metadata cache size policy, possible values are "default" and "maximum".

s/policy, possible/policy. Possible/

...
+ "default" leaves setting cache size to hypervisor, "maximum" makes

s/"default"/Using "default"/

s/to hypervisor,/to the hypervisor./

s/"maximum"/Using "maximum"

...
+ cache size large enough to keep all metadata, this will help if workload

"Using maximum assigns the largest value possible for the cache size. This ensures the entire disk cache remains in memory for faster I/O at the expense of utilizing more memory."

[Editorial comment: it's really not 100% clear what all the tradeoffs someone is making here. The phrase "large enough" just sticks out, but since you use INT64_MAX in patch 3, I suppose that *is* the maximum. Still in some way indicating that this allows QEMU to grow the cache and keep everything in memory, but has the side effect that disks configured this way will cause guest memory requirements to grow albeit limited by the QEMU algorithms. It seems from my read the max is 32MB, so perhaps not a huge deal, but could be useful to note. Whether QEMU shrinks the cache when not in use wasn't 100% clear to me.]

It's too bad it's not possible to utilize some dynamic value via qcow2_update_options_prepare. If dynamic adjustment were possible, then saving the value in the XML wouldn't be necessary - we could just allow dynamic adjustment similar to what I did for of a couple of IOThread params (see commit 11ceedcda and the patches before it).

...
+ needs access to whole disk all the time. (<span class="since">Since + 4.10.0, QEMU 3.1</span>)

This will be at least 5.0.0 now.

...
+ </li> + <li> For virtio disks, <a href="#elementsVirtio">Virtio-specific options</a> can also be set. (<span class="since">Since 3.5.0</span>) diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index b9ac5df..3e406fc 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1990,6 +1990,9 @@ <ref name="detect_zeroes"/> </optional> <optional> + <ref name="metadata_cache_size"/> + </optional> + <optional> <attribute name='queues'> <ref name="positiveInteger"/> </attribute> @@ -2090,6 +2093,14 @@ </choice> </attribute> </define> + <define name="metadata_cache_size"> + <attribute name='metadata_cache_size'> + <choice> + <value>default</value> + <value>maximum</value>

I didn't go back and read the previous reviews nor was I present for the KVM Forum discussion on this, but default really means "minimum" I think. Even that just doesn't feel "right". There's 3 QEMU knobs, but this changes 1 knob allowing one direction. And yes, changing 3 knobs is also confusing. I "assume" that it's been determined that this one knob has the greatest affect on I/O performance...

Reading https://github.com/qemu/qemu/blob/master/docs/qcow2-cache.txt some phrases stick out:

1. "setting the right cache sizes is not a straightforward operation". 2. "In order to choose the cache sizes we need to know how they relate to the amount of allocated space." 3. The limit of the l2 cache size is "32 MB by default on Linux platforms (enough for full coverage of 256 GB images, with the default cluster size)". 4. "QEMU will not use more memory than needed to hold all of the image's L2 tables, regardless of this max. value."

So at least providing INT_MAX won't hurt, although given the math in qcow2_update_options_prepare:

l2_cache_size /= l2_cache_entry_size; if (l2_cache_size < MIN_L2_CACHE_SIZE) { l2_cache_size = MIN_L2_CACHE_SIZE; } if (l2_cache_size > INT_MAX) {

wouldn't that mean we could pass up to "l2_cache_size * l2_cache_entry_size"?

You mean up to INT_MAX / l2_cache_entry_size? No, because there is limitation also that comes from read_cache_sizes so cache will be limited to maximum needs of disk size.

So many dependencies... So much math to look at.

...
...
Still using minimum/maximum for what amounts to a boolean operation is I think unnecessary. Rather than a list of values, wouldn't having something like "max_metadata_cache='yes'" be just as effective?

Well introducing metadata_cache_size enum is what it was agreed on KVM Forum...

But yet providing maximum runs into immediate issues because of bugs that are fixed in 3.1.

Devil's advocate says - why not patch QEMU to add something that causes the allocation algorithm to use the maximum values for great I/O performance - that way mgmt apps don't have to figure out the math and if the QEMU algorithm for this ever changes and guest XML settings don't have a negative impact. The mgmt app just has to signify it's desire to use algorithms that "can" improve I/O performance significantly.

But this is exactly the case with approach this patch presents. Only 'maximum' cache setting is possible which corresponds to maxium IO. All the complexities of possible cache size values are not revealed.

Understood. My QEMU point still stands... Since someone else is the expert here, then providing a means to provide a couple of flags to alter the "default" algorithm to have better values based on knowledge and experience I think is better than the mgmt tool having to provide values for any of these knobs. I also understand the QEMU viewpoint - hey here's a bunch of knobs for you to play with - we don't want to make the decision which is the best option for your environment. Reminds me of old stereo equipment with bass, treble, and balance knobs eventually being replaced by electronic technology to say I want those knobs to be automatically adjusted for news, music, or movies. Since we're not going to get that any time soon, then OK have the mgmt app be forced to take the giant hammer approach. John

...

Nikolay

...
...
...
Alternatively, reading through the above document and thinking about how "useful" it could be to use a more specific value, why not go with "metadata_cache_size=N", where N is in MB and describes the size of the cache to be used. Being in MB ascribes to the need to be a power of 2 and quite frankly keeps a reasonable range assigned to the value. I'd find it hard to fathom a "large enough" disk would improve that much I/O performance in KB adjustments, but I haven't done any analysis of that theory either.

There a lot of discussion of this cache parameter in this list, bugzilla (sorry for not providing links) that boils down to one does not know how to set it properly, so having option that is in MB is hard to use. On

I know there's lots of discussion on this - you did list a few series in your first cover. Using MB was merely a reaction to reading the document and seeing it says "The maximum L2 cache size is 32 MB by default on Linux platforms". So a value between 1 to 32 could be used if someone wants to go through the pain of figuring out the nuances of the algorithm and difference in performance in their environment for each step. Not providing a "set" number though satisfies the desire to avoid some number being baked into the guest XML permanently.

...
the other hand there are cases (synthetic tests, large database) when default cache size is clearly not big enough so in course of discussion it was decided to have besides default cache size also cache size that cover whole disk so IO performance will not degrade whatever guest will do.

I understand the need... Many previous series have shown that!

...
Thus this 'maximum' set. 'default' is just an explicit default (implicit is omitted metadata_cache_size option of course).

The chosen implementation is boolean - all or default. Was there discussion that also noted that perhaps we should add a 3rd or 4th enum or string to describe some condition between default and maximum ("quarter", "half", "threefourths")?

In the long run it doesn't matter, but as the implementation is posted it is a boolean operation. I can live with "minimum" and "maximum", but feel it's important to note the obvious especially since trying to describe anything between could take the PHD (piled higher and deeper) of doc writing abilities.

...
Also having enum instead of bool is a bit future proof )

In my mind only if the choice is at some time in the future to add something between default and maximum. I honestly don't see that happening, but if the group decision at KVM Forum was to go with some sort of enum - I can live with that. Perhaps something was discussed there that just isn't obvious with my short term tunnel vision.

John

...
Nikolay

...
I think the above document also gives a "fair" example that could be "reworded" into the libvirt docs in order to help "size" things based on the default QEMU values we're not allowing to be changed and then showing the math what it means to have a 1MB cache, a 2MB cache, an 8MB, etc. vis-a-vis the size of the disk for which the cache is being created. For example, for a disk of up to 8G, the "default" cache would be XXX and setting a value of YYY would help I/O. Similarly for a 16GB, 64GB, etc. Essentially a bit of a guide - although that's starting to border on what should go into the wiki instead.

...
+ </choice> + </attribute> + </define> <define name="controller"> <element name="controller"> <optional> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 237540b..b2185f8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -885,6 +885,11 @@ VIR_ENUM_IMPL(virDomainDiskDetectZeroes, VIR_DOMAIN_DISK_DETECT_ZEROES_LAST, "on", "unmap")

+VIR_ENUM_IMPL(virDomainDiskMetadataCacheSize, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST, + "default", + "maximum") + VIR_ENUM_IMPL(virDomainDiskMirrorState, VIR_DOMAIN_DISK_MIRROR_STATE_LAST, "none", "yes", @@ -9375,6 +9380,14 @@ virDomainDiskDefDriverParseXML(virDomainDiskDefPtr def, } VIR_FREE(tmp);

+ if ((tmp = virXMLPropString(cur, "metadata_cache_size")) && + (def->metadata_cache_size = virDomainDiskMetadataCacheSizeTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("unknown driver metadata_cache_size value '%s'"), tmp); + goto cleanup; + } + VIR_FREE(tmp); +

This would just use the YesNo type logic instead or read in a numeric value. BTW: Not sure I'd bother with worrying about checking the QEMU maximum as that could change and then we're stuck with a lower maximum check. Just pass along the value to QEMU and let it fail.

...
ret = 0;

cleanup: @@ -23902,6 +23915,10 @@ virDomainDiskDefFormatDriver(virBufferPtr buf, if (disk->queues) virBufferAsprintf(&driverBuf, " queues='%u'", disk->queues);

+ if (disk->metadata_cache_size) + virBufferAsprintf(&driverBuf, " metadata_cache_size='%s'", + virDomainDiskMetadataCacheSizeTypeToString(disk->metadata_cache_size)); +

My personal crusade... metadata_cache_size > VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT (yes, > 0).

Of course this all depends on your final solution - a boolean would only be written if set and an int value only written if > 0 (since 0 would be the "optional" viewpoint).

...
virDomainVirtioOptionsFormat(&driverBuf, disk->virtio);

ret = virXMLFormatElement(buf, "driver", &driverBuf, NULL); diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index e30a4b2..b155058 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -568,6 +568,13 @@ typedef enum { VIR_DOMAIN_DISK_DETECT_ZEROES_LAST } virDomainDiskDetectZeroes;

+typedef enum { + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_DEFAULT = 0, + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM, + + VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_LAST +} virDomainDiskMetadataCacheSize; + typedef struct _virDomainBlockIoTuneInfo virDomainBlockIoTuneInfo; struct _virDomainBlockIoTuneInfo { unsigned long long total_bytes_sec; @@ -672,6 +679,7 @@ struct _virDomainDiskDef { int discard; /* enum virDomainDiskDiscard */ unsigned int iothread; /* unused = 0, > 0 specific thread # */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskMetadataCacheSize */ char *domain_name; /* backend domain name */ unsigned int queues; virDomainVirtioOptionsPtr virtio; @@ -3388,6 +3396,7 @@ VIR_ENUM_DECL(virDomainDeviceSGIO) VIR_ENUM_DECL(virDomainDiskTray) VIR_ENUM_DECL(virDomainDiskDiscard) VIR_ENUM_DECL(virDomainDiskDetectZeroes) +VIR_ENUM_DECL(virDomainDiskMetadataCacheSize) VIR_ENUM_DECL(virDomainDiskMirrorState) VIR_ENUM_DECL(virDomainController) VIR_ENUM_DECL(virDomainControllerModelPCI) diff --git a/tests/qemuxml2argvdata/disk-metadata_cache_size.xml b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..8ac2599 --- /dev/null +++ b/tests/qemuxml2argvdata/disk-metadata_cache_size.xml @@ -0,0 +1,42 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type>

Nothing more recent than pc-0.13 ;-) for this copy-pasta.

...
+ <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk>

This second disk isn't necessary

John

...
+ <controller type='usb' index='0'/> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'/> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'/> + </devices> +</domain> diff --git a/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml new file mode 100644 index 0000000..5fed22b --- /dev/null +++ b/tests/qemuxml2xmloutdata/disk-metadata_cache_size.xml @@ -0,0 +1,48 @@ +<domain type='qemu'> + <name>test</name> + <uuid>468404ad-d49c-40f2-9e14-02294f9c1be3</uuid> + <memory unit='KiB'>1048576</memory> + <currentMemory unit='KiB'>1048576</currentMemory> + <vcpu placement='static'>1</vcpu> + <os> + <type arch='x86_64' machine='pc-0.13'>hvm</type> + <boot dev='cdrom'/> + <boot dev='hd'/> + <bootmenu enable='yes'/> + </os> + <clock offset='utc'/> + <on_poweroff>destroy</on_poweroff> + <on_reboot>restart</on_reboot> + <on_crash>restart</on_crash> + <devices> + <emulator>/usr/bin/qemu-system-x86_64</emulator> + <disk type='file' device='disk'> + <driver name='qemu' type='qcow2' metadata_cache_size='maximum'/> + <source file='/var/lib/libvirt/images/f14.img'/> + <target dev='vda' bus='virtio'/> + <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/> + </disk> + <disk type='file' device='cdrom'> + <driver name='qemu' type='raw'/> + <source file='/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso'/> + <target dev='hdc' bus='ide'/> + <readonly/> + <address type='drive' controller='0' bus='1' target='0' unit='0'/> + </disk> + <controller type='usb' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/> + </controller> + <controller type='virtio-serial' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/> + </controller> + <controller type='ide' index='0'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/> + </controller> + <controller type='pci' index='0' model='pci-root'/> + <input type='mouse' bus='ps2'/> + <input type='keyboard' bus='ps2'/> + <memballoon model='virtio'> + <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/> + </memballoon> + </devices> +</domain> diff --git a/tests/qemuxml2xmltest.c b/tests/qemuxml2xmltest.c index 89640f6..c44e0fe 100644 --- a/tests/qemuxml2xmltest.c +++ b/tests/qemuxml2xmltest.c @@ -1235,6 +1235,8 @@ mymain(void) DO_TEST("riscv64-virt", QEMU_CAPS_DEVICE_VIRTIO_MMIO);

+ DO_TEST("disk-metadata_cache_size", NONE); + if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) virFileDeleteTree(fakerootdir);

Nikolay Shirokovskiy

8 Nov 8 Nov

1:02 p.m.

New subject: [libvirt] [PATCH v2 2/4] qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE

For qemu capable of setting l2-cache-size for qcow2 images to INT64_MAX and semantics of upper limit on l2 cache size. We can only check this by qemu version (3.1.0) now. Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_capabilities.c | 5 +++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 6 insertions(+) diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 2ca5af3..49a3b60 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -509,6 +509,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "vfio-pci.display", "blockdev", "vfio-ap", + "qcow2.l2-cache-size", ); @@ -4117,6 +4118,10 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsSet(qemuCaps, QEMU_CAPS_MACHINE_PSERIES_MAX_CPU_COMPAT); } + /* l2-cache-size before 3001000 does not accept INT64_MAX */ + if (qemuCaps->version >= 3001000) + virQEMUCapsSet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE); + if (virQEMUCapsProbeQMPCommands(qemuCaps, mon) < 0) goto cleanup; diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6bb9a2c..bb2ac17 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -493,6 +493,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_VFIO_PCI_DISPLAY, /* -device vfio-pci.display */ QEMU_CAPS_BLOCKDEV, /* -blockdev and blockdev-add are supported */ QEMU_CAPS_DEVICE_VFIO_AP, /* -device vfio-ap */ + QEMU_CAPS_QCOW2_L2_CACHE_SIZE, /* -blockdev supports l2-cache-size with INT64_MAX value */ QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags; -- 1.8.3.1

John Ferlan

10 Dec 10 Dec

4:58 p.m.

New subject: [libvirt] [PATCH v2 2/4] qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE

On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...

For qemu capable of setting l2-cache-size for qcow2 images to INT64_MAX and semantics of upper limit on l2 cache size. We can only check this by qemu version (3.1.0) now.

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_capabilities.c | 5 +++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 6 insertions(+)

This patch needs to be updated to top of tree.

...

diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 2ca5af3..49a3b60 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -509,6 +509,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "vfio-pci.display", "blockdev", "vfio-ap", + "qcow2.l2-cache-size",

When you do update, you will need to fix the comma-less entry for "egl-headless.rendernode" as well, unless someone else gets to it first.

...

);

@@ -4117,6 +4118,10 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsSet(qemuCaps, QEMU_CAPS_MACHINE_PSERIES_MAX_CPU_COMPAT); }

+ /* l2-cache-size before 3001000 does not accept INT64_MAX */ + if (qemuCaps->version >= 3001000) + virQEMUCapsSet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE); +

Not a fan of this. The 'l2-cache-size' was supported since QEMU 2.2. A "newer" algorithm for cache adjustment has been supported since QEMU 2.12 (and there's l2-cache-entry-size that "could be used" to know that). Then there's some unreferenced commit indicating usage of INT64_MAX. Tracking that down, I find qemu.git commit 6c1c8d5d from Max which enforces MAX_INT. Still does that really matter? Let QEMU pick their own max and don't have libvirt be the arbiter of that (like I noted in my 1/4 review). My reasoning is that there's been quite a few "adjustments" to the algorithms along the way. Those adjustments are one of the concerns voiced in the past why making any "semi-permanent" change to the value in some libvirt XML format has been NACKed historically. THus by placing boundaries that are true today we limit ourselves for the future. BTW: If 3.1 was the "base" from which you want to work, then adjustments to the tests/qemucapabilitiesdata/*3_1*{.replies|.xml } files would be required as evidenced by your patch 4. The *.xml file would need to have the correct <version>3001000</version> setting which should allow patch4 to be merged into patch3. John

...

if (virQEMUCapsProbeQMPCommands(qemuCaps, mon) < 0) goto cleanup;

diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6bb9a2c..bb2ac17 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -493,6 +493,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_VFIO_PCI_DISPLAY, /* -device vfio-pci.display */ QEMU_CAPS_BLOCKDEV, /* -blockdev and blockdev-add are supported */ QEMU_CAPS_DEVICE_VFIO_AP, /* -device vfio-ap */ + QEMU_CAPS_QCOW2_L2_CACHE_SIZE, /* -blockdev supports l2-cache-size with INT64_MAX value */

QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags;

Nikolay Shirokovskiy

11 Dec 11 Dec

11:04 a.m.

New subject: [libvirt] [PATCH v2 2/4] qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE

On 10.12.2018 19:58, John Ferlan wrote:

...

On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
For qemu capable of setting l2-cache-size for qcow2 images to INT64_MAX and semantics of upper limit on l2 cache size. We can only check this by qemu version (3.1.0) now.

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_capabilities.c | 5 +++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 6 insertions(+)

This patch needs to be updated to top of tree.

...
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 2ca5af3..49a3b60 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -509,6 +509,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "vfio-pci.display", "blockdev", "vfio-ap", + "qcow2.l2-cache-size",

When you do update, you will need to fix the comma-less entry for "egl-headless.rendernode" as well, unless someone else gets to it first.

...
);

@@ -4117,6 +4118,10 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsSet(qemuCaps, QEMU_CAPS_MACHINE_PSERIES_MAX_CPU_COMPAT); }

+ /* l2-cache-size before 3001000 does not accept INT64_MAX */ + if (qemuCaps->version >= 3001000) + virQEMUCapsSet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE); +

Not a fan of this.

The 'l2-cache-size' was supported since QEMU 2.2. A "newer" algorithm for cache adjustment has been supported since QEMU 2.12 (and there's l2-cache-entry-size that "could be used" to know that). Then there's some unreferenced commit indicating usage of INT64_MAX. Tracking that down, I find qemu.git commit 6c1c8d5d from Max which enforces MAX_INT.

This is a failure rather than enforcement. And AFAIU code that limit cache to appropriate maximum when INT64_MAX is given in read_cache_sizes: *l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting); only appeared after release of 3.0 in commit b749562d9822d14ef69c9eaa5f85903010b86c30 Author: Leonid Bloch <lbloch@janustech.com> Date: Wed Sep 26 19:04:43 2018 +0300 qcow2: Assign the L2 cache relatively to the image size ie setting cache size to INT64_MAX before 3.1 will fail. In other words in earlier versions there were no value to specify that cache size need to be set to maximum. You can calculate this value yourself knowing disk size and disk cluster size and set it but this is not convinient.

...

Still does that really matter? Let QEMU pick their own max and don't have libvirt be the arbiter of that (like I noted in my 1/4 review). My reasoning is that there's been quite a few "adjustments" to the algorithms along the way. Those adjustments are one of the concerns voiced in the past why making any "semi-permanent" change to the value in some libvirt XML format has been NACKed historically. THus by placing boundaries that are true today we limit ourselves for the future.

IIUC setting INT64_MAX is ok in this respect. It is not real value for cache but rather order for QEMU to pick up one.

...

BTW: If 3.1 was the "base" from which you want to work, then adjustments to the tests/qemucapabilitiesdata/*3_1*{.replies|.xml } files would be required as evidenced by your patch 4. The *.xml file would need to have the correct <version>3001000</version> setting which should allow patch4 to be merged into patch3.

Yeah, but 3.1 is not yet released and I need blockdev also as patch only makes difference in latter case. Nikolay

...

John

...
if (virQEMUCapsProbeQMPCommands(qemuCaps, mon) < 0) goto cleanup;

diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6bb9a2c..bb2ac17 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -493,6 +493,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_VFIO_PCI_DISPLAY, /* -device vfio-pci.display */ QEMU_CAPS_BLOCKDEV, /* -blockdev and blockdev-add are supported */ QEMU_CAPS_DEVICE_VFIO_AP, /* -device vfio-ap */ + QEMU_CAPS_QCOW2_L2_CACHE_SIZE, /* -blockdev supports l2-cache-size with INT64_MAX value */

QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags;

John Ferlan

4:41 p.m.

New subject: [libvirt] [PATCH v2 2/4] qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE

On 12/11/18 6:04 AM, Nikolay Shirokovskiy wrote:

...

On 10.12.2018 19:58, John Ferlan wrote:

...
On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
For qemu capable of setting l2-cache-size for qcow2 images to INT64_MAX and semantics of upper limit on l2 cache size. We can only check this by qemu version (3.1.0) now.

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_capabilities.c | 5 +++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 6 insertions(+)

This patch needs to be updated to top of tree.

...
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 2ca5af3..49a3b60 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -509,6 +509,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "vfio-pci.display", "blockdev", "vfio-ap", + "qcow2.l2-cache-size",

When you do update, you will need to fix the comma-less entry for "egl-headless.rendernode" as well, unless someone else gets to it first.

...
);

@@ -4117,6 +4118,10 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsSet(qemuCaps, QEMU_CAPS_MACHINE_PSERIES_MAX_CPU_COMPAT); }

+ /* l2-cache-size before 3001000 does not accept INT64_MAX */ + if (qemuCaps->version >= 3001000) + virQEMUCapsSet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE); +

Not a fan of this.

The 'l2-cache-size' was supported since QEMU 2.2. A "newer" algorithm for cache adjustment has been supported since QEMU 2.12 (and there's l2-cache-entry-size that "could be used" to know that). Then there's some unreferenced commit indicating usage of INT64_MAX. Tracking that down, I find qemu.git commit 6c1c8d5d from Max which enforces MAX_INT.

This is a failure rather than enforcement. And AFAIU code that limit cache to appropriate maximum when INT64_MAX is given in read_cache_sizes:

*l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);

only appeared after release of 3.0 in

commit b749562d9822d14ef69c9eaa5f85903010b86c30 Author: Leonid Bloch <lbloch@janustech.com> Date: Wed Sep 26 19:04:43 2018 +0300

qcow2: Assign the L2 cache relatively to the image size

ie setting cache size to INT64_MAX before 3.1 will fail. In other words in earlier versions there were no value to specify that cache size need to be set to maximum. You can calculate this value yourself knowing disk size and disk cluster size and set it but this is not convinient.

So prior to this patch the max could be 32 MB? And now it's calculate-able? Do you think it would be "worthwhile" to add logic that knows QEMU 2.12 and 3.0 could still support using l2_cache_size, but with a lower value say 32MB? Then if we find 3.1 installed then we can supply INT_MAX for the value? Oh, and if the "maximum" attribute isn't set, then the default of 0 is used. Thus being able to use "Z" instead of "I" for patch 3? BTW: One issue with providing a numerical QEMU version for something is that someone could backport that patch (and it's dependencies) into some maintenance branch or even downstream repo that won't report as 3.1, but would have the patch. But since libvirt would only accept 3.1 or later, it wouldn't allow usage.

...

...
Still does that really matter? Let QEMU pick their own max and don't have libvirt be the arbiter of that (like I noted in my 1/4 review). My reasoning is that there's been quite a few "adjustments" to the algorithms along the way. Those adjustments are one of the concerns voiced in the past why making any "semi-permanent" change to the value in some libvirt XML format has been NACKed historically. THus by placing boundaries that are true today we limit ourselves for the future.

IIUC setting INT64_MAX is ok in this respect. It is not real value for cache but rather order for QEMU to pick up one.

Right providing some unrealistically large value would appear to work. What that value "could be" for 2.12 and 3.0.0 which do support some level of alteration and would be 'nice' to include in the mix, but not required I suppose. And yes, I do have some downstream representation in mind by thinking about this - it's in beta now ;-). John

...

...
BTW: If 3.1 was the "base" from which you want to work, then adjustments to the tests/qemucapabilitiesdata/*3_1*{.replies|.xml } files would be required as evidenced by your patch 4. The *.xml file would need to have the correct <version>3001000</version> setting which should allow patch4 to be merged into patch3.

Yeah, but 3.1 is not yet released and I need blockdev also as patch only makes difference in latter case.

Nikolay

...
John

...
if (virQEMUCapsProbeQMPCommands(qemuCaps, mon) < 0) goto cleanup;

diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6bb9a2c..bb2ac17 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -493,6 +493,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_VFIO_PCI_DISPLAY, /* -device vfio-pci.display */ QEMU_CAPS_BLOCKDEV, /* -blockdev and blockdev-add are supported */ QEMU_CAPS_DEVICE_VFIO_AP, /* -device vfio-ap */ + QEMU_CAPS_QCOW2_L2_CACHE_SIZE, /* -blockdev supports l2-cache-size with INT64_MAX value */

QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags;

Nikolay Shirokovskiy

12 Dec 12 Dec

10:59 a.m.

New subject: [libvirt] [PATCH v2 2/4] qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE

On 11.12.2018 19:41, John Ferlan wrote:

...

On 12/11/18 6:04 AM, Nikolay Shirokovskiy wrote:

...
On 10.12.2018 19:58, John Ferlan wrote:

...
On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
For qemu capable of setting l2-cache-size for qcow2 images to INT64_MAX and semantics of upper limit on l2 cache size. We can only check this by qemu version (3.1.0) now.

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_capabilities.c | 5 +++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 6 insertions(+)

This patch needs to be updated to top of tree.

...
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 2ca5af3..49a3b60 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -509,6 +509,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "vfio-pci.display", "blockdev", "vfio-ap", + "qcow2.l2-cache-size",

When you do update, you will need to fix the comma-less entry for "egl-headless.rendernode" as well, unless someone else gets to it first.

...
);

@@ -4117,6 +4118,10 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsSet(qemuCaps, QEMU_CAPS_MACHINE_PSERIES_MAX_CPU_COMPAT); }

+ /* l2-cache-size before 3001000 does not accept INT64_MAX */ + if (qemuCaps->version >= 3001000) + virQEMUCapsSet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE); +

Not a fan of this.

The 'l2-cache-size' was supported since QEMU 2.2. A "newer" algorithm for cache adjustment has been supported since QEMU 2.12 (and there's l2-cache-entry-size that "could be used" to know that). Then there's some unreferenced commit indicating usage of INT64_MAX. Tracking that down, I find qemu.git commit 6c1c8d5d from Max which enforces MAX_INT.

This is a failure rather than enforcement. And AFAIU code that limit cache to appropriate maximum when INT64_MAX is given in read_cache_sizes:

*l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);

only appeared after release of 3.0 in

commit b749562d9822d14ef69c9eaa5f85903010b86c30 Author: Leonid Bloch <lbloch@janustech.com> Date: Wed Sep 26 19:04:43 2018 +0300

qcow2: Assign the L2 cache relatively to the image size

ie setting cache size to INT64_MAX before 3.1 will fail. In other words in earlier versions there were no value to specify that cache size need to be set to maximum. You can calculate this value yourself knowing disk size and disk cluster size and set it but this is not convinient.

So prior to this patch the max could be 32 MB? And now it's calculate-able?

32 MB is default value on Linux in 3.1 (the patch set it it to 1 MB but later patch 80668d0f increases to 32 MB). Before the patch one can set greater values but can not set INT64_MAX. The exact value that can be set depends on cluster size due to the check againts INT_MAX you already mentioned. So one can set up to INT_MAX * cluster_size, but cluster_size is unknown to us (?). Given minium cluster size is 512 bytes, we can set INT_MAX * 512 (1 TB) always. This will cover 64 TB disks for cluster size 512 bytes and 8192 PB for default cluster size. So we should refuse to start with policy 'maximum' and disk size greater then 64 TB. Or may be we can just check cluster size at start up. How do you think? By the way the patch works only for -blockdev configuration which is available since QEMU 2.10 AFAIK. So we could purse to support 2.10, 2.11, 2.12 and 3.0 version in principle.

...

Do you think it would be "worthwhile" to add logic that knows QEMU 2.12 and 3.0 could still support using l2_cache_size, but with a lower value say 32MB? Then if we find 3.1 installed then we can supply INT_MAX for the value? Oh, and if the "maximum" attribute isn't set, then the default of 0 is used. Thus being able to use "Z" instead of "I" for patch 3?

In default case l2-cache-size just does not get set.

...

BTW: One issue with providing a numerical QEMU version for something is that someone could backport that patch (and it's dependencies) into some maintenance branch or even downstream repo that won't report as 3.1, but would have the patch. But since libvirt would only accept 3.1 or later, it wouldn't allow usage.

Does not look critical. The functionality just won't work from start and when they start to investigate why in the result they can backport appropriate qemu patches as well and fix libvirt caps code for example. Nikolay

...

...
...
Still does that really matter? Let QEMU pick their own max and don't have libvirt be the arbiter of that (like I noted in my 1/4 review). My reasoning is that there's been quite a few "adjustments" to the algorithms along the way. Those adjustments are one of the concerns voiced in the past why making any "semi-permanent" change to the value in some libvirt XML format has been NACKed historically. THus by placing boundaries that are true today we limit ourselves for the future.

IIUC setting INT64_MAX is ok in this respect. It is not real value for cache but rather order for QEMU to pick up one.

Right providing some unrealistically large value would appear to work. What that value "could be" for 2.12 and 3.0.0 which do support some level of alteration and would be 'nice' to include in the mix, but not required I suppose. And yes, I do have some downstream representation in mind by thinking about this - it's in beta now ;-).

John

...
...
BTW: If 3.1 was the "base" from which you want to work, then adjustments to the tests/qemucapabilitiesdata/*3_1*{.replies|.xml } files would be required as evidenced by your patch 4. The *.xml file would need to have the correct <version>3001000</version> setting which should allow patch4 to be merged into patch3.

Yeah, but 3.1 is not yet released and I need blockdev also as patch only makes difference in latter case.

Nikolay

...
John

...
if (virQEMUCapsProbeQMPCommands(qemuCaps, mon) < 0) goto cleanup;

diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6bb9a2c..bb2ac17 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -493,6 +493,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_VFIO_PCI_DISPLAY, /* -device vfio-pci.display */ QEMU_CAPS_BLOCKDEV, /* -blockdev and blockdev-add are supported */ QEMU_CAPS_DEVICE_VFIO_AP, /* -device vfio-ap */ + QEMU_CAPS_QCOW2_L2_CACHE_SIZE, /* -blockdev supports l2-cache-size with INT64_MAX value */

QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags;

John Ferlan

2:28 p.m.

New subject: [libvirt] [PATCH v2 2/4] qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE

On 12/12/18 5:59 AM, Nikolay Shirokovskiy wrote:

...

On 11.12.2018 19:41, John Ferlan wrote:

...
On 12/11/18 6:04 AM, Nikolay Shirokovskiy wrote:

...
On 10.12.2018 19:58, John Ferlan wrote:

...
On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
For qemu capable of setting l2-cache-size for qcow2 images to INT64_MAX and semantics of upper limit on l2 cache size. We can only check this by qemu version (3.1.0) now.

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_capabilities.c | 5 +++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 6 insertions(+)

This patch needs to be updated to top of tree.

...
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 2ca5af3..49a3b60 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -509,6 +509,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "vfio-pci.display", "blockdev", "vfio-ap", + "qcow2.l2-cache-size",

When you do update, you will need to fix the comma-less entry for "egl-headless.rendernode" as well, unless someone else gets to it first.

...
);

@@ -4117,6 +4118,10 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsSet(qemuCaps, QEMU_CAPS_MACHINE_PSERIES_MAX_CPU_COMPAT); }

+ /* l2-cache-size before 3001000 does not accept INT64_MAX */ + if (qemuCaps->version >= 3001000) + virQEMUCapsSet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE); +

Not a fan of this.

The 'l2-cache-size' was supported since QEMU 2.2. A "newer" algorithm for cache adjustment has been supported since QEMU 2.12 (and there's l2-cache-entry-size that "could be used" to know that). Then there's some unreferenced commit indicating usage of INT64_MAX. Tracking that down, I find qemu.git commit 6c1c8d5d from Max which enforces MAX_INT.

This is a failure rather than enforcement. And AFAIU code that limit cache to appropriate maximum when INT64_MAX is given in read_cache_sizes:

*l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);

only appeared after release of 3.0 in

commit b749562d9822d14ef69c9eaa5f85903010b86c30 Author: Leonid Bloch <lbloch@janustech.com> Date: Wed Sep 26 19:04:43 2018 +0300

qcow2: Assign the L2 cache relatively to the image size

ie setting cache size to INT64_MAX before 3.1 will fail. In other words in earlier versions there were no value to specify that cache size need to be set to maximum. You can calculate this value yourself knowing disk size and disk cluster size and set it but this is not convinient.

So prior to this patch the max could be 32 MB? And now it's calculate-able?

32 MB is default value on Linux in 3.1 (the patch set it it to 1 MB but later patch 80668d0f increases to 32 MB).

Before the patch one can set greater values but can not set INT64_MAX. The exact value that can be set depends on cluster size due to the check againts INT_MAX you already mentioned. So one can set up to INT_MAX * cluster_size, but cluster_size is unknown to us (?). Given minium cluster size is 512 bytes, we can set INT_MAX * 512 (1 TB) always. This will cover 64 TB disks for cluster size 512 bytes and 8192 PB for default cluster size. So we should refuse to start with policy 'maximum' and disk size greater then 64 TB. Or may be we can just check cluster size at start up. How do you think?

I'm not sure I'd go through the trouble of determining the size. If we're not changing the default cluster_size, then using 512 "seems" reasonable (whether others feel the same way who knows yet). I'm not even sure if we already get that information - feel free to research and add though. I'm not sure I would "refuse" to start - think in terms of what is the downside to not being able to have a large enough value - well that seems to be I/O performance. If we can get "better" I/O performance by providing "something" more than "default" for those environments, then I would think that's better than doing nothing or failing to start. I'm not sure how much "effort" you want to put into the window of support prior to 3.1 where "maximum" is true maximum. It could be simple enough to document that for prior to 3.1 the maximum value is limited to XXX. After 3.1 it's essentially unlimited since libvirt can provide the maximum integer value to QEMU and QEMU then decides the maximum up to that value. There's ways to wordsmith it - I'm pretty sure it's been done before, but I don't have an example readily available (search docs for size, maximum, or default)...

...

By the way the patch works only for -blockdev configuration which is available since QEMU 2.10 AFAIK. So we could purse to support 2.10, 2.11, 2.12 and 3.0 version in principle.

Ah true, the --blockdev option is required. I've put the "artificial" point in time of 2.12 because that when I recall Berto making adjustments to the cache size algorithms and when the @l2-cache-entry-size appeared in qapi/block-core.json. I think prior to those changes the algorithm was even more difficult to decipher.

...

...
Do you think it would be "worthwhile" to add logic that knows QEMU 2.12 and 3.0 could still support using l2_cache_size, but with a lower value say 32MB? Then if we find 3.1 installed then we can supply INT_MAX for the value? Oh, and if the "maximum" attribute isn't set, then the default of 0 is used. Thus being able to use "Z" instead of "I" for patch 3?

In default case l2-cache-size just does not get set.

Yes on earlier support and I think using l2-cache-entry-size will give us a point in time to support from. The max size chosen is up to you - I think if we document what we set to prior to QEMU 3.1, then we'll be good. Adding a second capability that would be (yuck) version based because we know a bug was fixed is going to have to be acceptable. Then, the logic to "set" the value would be something like: if (domain maximum provided) { if (some 3.1 capability is set) max = INT64_MAX; else if (at least 2.12 capability is set) max = "whatever smaller value you choose" else error this qemu doesn't support setting... }

...

...
BTW: One issue with providing a numerical QEMU version for something is that someone could backport that patch (and it's dependencies) into some maintenance branch or even downstream repo that won't report as 3.1, but would have the patch. But since libvirt would only accept 3.1 or later, it wouldn't allow usage.

Does not look critical. The functionality just won't work from start and when they start to investigate why in the result they can backport appropriate qemu patches as well and fix libvirt caps code for example.

True, not critical, but it's one of the reasons numerical version checking is disliked. Similarly providing in docs that something is supported as of QEMU maj.min will get "dinged" with the reasoning of what if someone backports... and I have a similar response to yours for that. John

...

Nikolay

...
...
...
Still does that really matter? Let QEMU pick their own max and don't have libvirt be the arbiter of that (like I noted in my 1/4 review). My reasoning is that there's been quite a few "adjustments" to the algorithms along the way. Those adjustments are one of the concerns voiced in the past why making any "semi-permanent" change to the value in some libvirt XML format has been NACKed historically. THus by placing boundaries that are true today we limit ourselves for the future.

IIUC setting INT64_MAX is ok in this respect. It is not real value for cache but rather order for QEMU to pick up one.

Right providing some unrealistically large value would appear to work. What that value "could be" for 2.12 and 3.0.0 which do support some level of alteration and would be 'nice' to include in the mix, but not required I suppose. And yes, I do have some downstream representation in mind by thinking about this - it's in beta now ;-).

John

...
...
BTW: If 3.1 was the "base" from which you want to work, then adjustments to the tests/qemucapabilitiesdata/*3_1*{.replies|.xml } files would be required as evidenced by your patch 4. The *.xml file would need to have the correct <version>3001000</version> setting which should allow patch4 to be merged into patch3.

Yeah, but 3.1 is not yet released and I need blockdev also as patch only makes difference in latter case.

Nikolay

...
John

...
if (virQEMUCapsProbeQMPCommands(qemuCaps, mon) < 0) goto cleanup;

diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6bb9a2c..bb2ac17 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -493,6 +493,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_VFIO_PCI_DISPLAY, /* -device vfio-pci.display */ QEMU_CAPS_BLOCKDEV, /* -blockdev and blockdev-add are supported */ QEMU_CAPS_DEVICE_VFIO_AP, /* -device vfio-ap */ + QEMU_CAPS_QCOW2_L2_CACHE_SIZE, /* -blockdev supports l2-cache-size with INT64_MAX value */

QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags;

Nikolay Shirokovskiy

3:08 p.m.

New subject: [libvirt] [PATCH v2 2/4] qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE

On 12.12.2018 17:28, John Ferlan wrote:

...

On 12/12/18 5:59 AM, Nikolay Shirokovskiy wrote:

...
On 11.12.2018 19:41, John Ferlan wrote:

...
On 12/11/18 6:04 AM, Nikolay Shirokovskiy wrote:

...
On 10.12.2018 19:58, John Ferlan wrote:

...
On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
For qemu capable of setting l2-cache-size for qcow2 images to INT64_MAX and semantics of upper limit on l2 cache size. We can only check this by qemu version (3.1.0) now.

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_capabilities.c | 5 +++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 6 insertions(+)

This patch needs to be updated to top of tree.

...
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 2ca5af3..49a3b60 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -509,6 +509,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "vfio-pci.display", "blockdev", "vfio-ap", + "qcow2.l2-cache-size",

When you do update, you will need to fix the comma-less entry for "egl-headless.rendernode" as well, unless someone else gets to it first.

...
);

@@ -4117,6 +4118,10 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsSet(qemuCaps, QEMU_CAPS_MACHINE_PSERIES_MAX_CPU_COMPAT); }

+ /* l2-cache-size before 3001000 does not accept INT64_MAX */ + if (qemuCaps->version >= 3001000) + virQEMUCapsSet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE); +

Not a fan of this.

The 'l2-cache-size' was supported since QEMU 2.2. A "newer" algorithm for cache adjustment has been supported since QEMU 2.12 (and there's l2-cache-entry-size that "could be used" to know that). Then there's some unreferenced commit indicating usage of INT64_MAX. Tracking that down, I find qemu.git commit 6c1c8d5d from Max which enforces MAX_INT.

This is a failure rather than enforcement. And AFAIU code that limit cache to appropriate maximum when INT64_MAX is given in read_cache_sizes:

*l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);

only appeared after release of 3.0 in

commit b749562d9822d14ef69c9eaa5f85903010b86c30 Author: Leonid Bloch <lbloch@janustech.com> Date: Wed Sep 26 19:04:43 2018 +0300

qcow2: Assign the L2 cache relatively to the image size

ie setting cache size to INT64_MAX before 3.1 will fail. In other words in earlier versions there were no value to specify that cache size need to be set to maximum. You can calculate this value yourself knowing disk size and disk cluster size and set it but this is not convinient.

So prior to this patch the max could be 32 MB? And now it's calculate-able?

32 MB is default value on Linux in 3.1 (the patch set it it to 1 MB but later patch 80668d0f increases to 32 MB).

Before the patch one can set greater values but can not set INT64_MAX. The exact value that can be set depends on cluster size due to the check againts INT_MAX you already mentioned. So one can set up to INT_MAX * cluster_size, but cluster_size is unknown to us (?). Given minium cluster size is 512 bytes, we can set INT_MAX * 512 (1 TB) always. This will cover 64 TB disks for cluster size 512 bytes and 8192 PB for default cluster size. So we should refuse to start with policy 'maximum' and disk size greater then 64 TB. Or may be we can just check cluster size at start up. How do you think?

I'm not sure I'd go through the trouble of determining the size. If we're not changing the default cluster_size, then using 512 "seems" reasonable (whether others feel the same way who knows yet). I'm not even sure if we already get that information - feel free to research and add though.

I'm not sure I would "refuse" to start - think in terms of what is the downside to not being able to have a large enough value - well that seems to be I/O performance. If we can get "better" I/O performance by providing "something" more than "default" for those environments, then I would think that's better than doing nothing or failing to start.

I'm not sure how much "effort" you want to put into the window of support prior to 3.1 where "maximum" is true maximum. It could be simple enough to document that for prior to 3.1 the maximum value is limited to XXX. After 3.1 it's essentially unlimited since libvirt can provide the maximum integer value to QEMU and QEMU then decides the maximum up to that value. There's ways to wordsmith it - I'm pretty sure it's been done before, but I don't have an example readily available (search docs for size, maximum, or default)...

...
By the way the patch works only for -blockdev configuration which is available since QEMU 2.10 AFAIK. So we could purse to support 2.10, 2.11, 2.12 and 3.0 version in principle.

Ah true, the --blockdev option is required.

I've put the "artificial" point in time of 2.12 because that when I recall Berto making adjustments to the cache size algorithms and when the @l2-cache-entry-size appeared in qapi/block-core.json. I think prior to those changes the algorithm was even more difficult to decipher.

Ok, I'll check what is possible to support without too much efforts. Hmm, looks like @l2-cache-size is available from much earlier versions... # @l2-cache-size: the maximum size of the L2 table cache in # bytes (since 2.2) Nikolay

...

...
...
Do you think it would be "worthwhile" to add logic that knows QEMU 2.12 and 3.0 could still support using l2_cache_size, but with a lower value say 32MB? Then if we find 3.1 installed then we can supply INT_MAX for the value? Oh, and if the "maximum" attribute isn't set, then the default of 0 is used. Thus being able to use "Z" instead of "I" for patch 3?

In default case l2-cache-size just does not get set.

Yes on earlier support and I think using l2-cache-entry-size will give us a point in time to support from. The max size chosen is up to you - I think if we document what we set to prior to QEMU 3.1, then we'll be good.

Adding a second capability that would be (yuck) version based because we know a bug was fixed is going to have to be acceptable. Then, the logic to "set" the value would be something like:

if (domain maximum provided) { if (some 3.1 capability is set) max = INT64_MAX; else if (at least 2.12 capability is set) max = "whatever smaller value you choose" else error this qemu doesn't support setting... }

...
...
BTW: One issue with providing a numerical QEMU version for something is that someone could backport that patch (and it's dependencies) into some maintenance branch or even downstream repo that won't report as 3.1, but would have the patch. But since libvirt would only accept 3.1 or later, it wouldn't allow usage.

Does not look critical. The functionality just won't work from start and when they start to investigate why in the result they can backport appropriate qemu patches as well and fix libvirt caps code for example.

True, not critical, but it's one of the reasons numerical version checking is disliked. Similarly providing in docs that something is supported as of QEMU maj.min will get "dinged" with the reasoning of what if someone backports... and I have a similar response to yours for that.

John

...
Nikolay

...
...
...
Still does that really matter? Let QEMU pick their own max and don't have libvirt be the arbiter of that (like I noted in my 1/4 review). My reasoning is that there's been quite a few "adjustments" to the algorithms along the way. Those adjustments are one of the concerns voiced in the past why making any "semi-permanent" change to the value in some libvirt XML format has been NACKed historically. THus by placing boundaries that are true today we limit ourselves for the future.

IIUC setting INT64_MAX is ok in this respect. It is not real value for cache but rather order for QEMU to pick up one.

Right providing some unrealistically large value would appear to work. What that value "could be" for 2.12 and 3.0.0 which do support some level of alteration and would be 'nice' to include in the mix, but not required I suppose. And yes, I do have some downstream representation in mind by thinking about this - it's in beta now ;-).

John

...
...
BTW: If 3.1 was the "base" from which you want to work, then adjustments to the tests/qemucapabilitiesdata/*3_1*{.replies|.xml } files would be required as evidenced by your patch 4. The *.xml file would need to have the correct <version>3001000</version> setting which should allow patch4 to be merged into patch3.

Yeah, but 3.1 is not yet released and I need blockdev also as patch only makes difference in latter case.

Nikolay

...
John

...
if (virQEMUCapsProbeQMPCommands(qemuCaps, mon) < 0) goto cleanup;

diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6bb9a2c..bb2ac17 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -493,6 +493,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_VFIO_PCI_DISPLAY, /* -device vfio-pci.display */ QEMU_CAPS_BLOCKDEV, /* -blockdev and blockdev-add are supported */ QEMU_CAPS_DEVICE_VFIO_AP, /* -device vfio-ap */ + QEMU_CAPS_QCOW2_L2_CACHE_SIZE, /* -blockdev supports l2-cache-size with INT64_MAX value */

QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags;

Nikolay Shirokovskiy

13 Dec 13 Dec

1:59 p.m.

New subject: [libvirt] [PATCH v2 2/4] qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE

On 12.12.2018 17:28, John Ferlan wrote:

...

On 12/12/18 5:59 AM, Nikolay Shirokovskiy wrote:

...
On 11.12.2018 19:41, John Ferlan wrote:

...
On 12/11/18 6:04 AM, Nikolay Shirokovskiy wrote:

...
On 10.12.2018 19:58, John Ferlan wrote:

...
On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
For qemu capable of setting l2-cache-size for qcow2 images to INT64_MAX and semantics of upper limit on l2 cache size. We can only check this by qemu version (3.1.0) now.

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_capabilities.c | 5 +++++ src/qemu/qemu_capabilities.h | 1 + 2 files changed, 6 insertions(+)

This patch needs to be updated to top of tree.

...
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 2ca5af3..49a3b60 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -509,6 +509,7 @@ VIR_ENUM_IMPL(virQEMUCaps, QEMU_CAPS_LAST, "vfio-pci.display", "blockdev", "vfio-ap", + "qcow2.l2-cache-size",

When you do update, you will need to fix the comma-less entry for "egl-headless.rendernode" as well, unless someone else gets to it first.

...
);

@@ -4117,6 +4118,10 @@ virQEMUCapsInitQMPMonitor(virQEMUCapsPtr qemuCaps, virQEMUCapsSet(qemuCaps, QEMU_CAPS_MACHINE_PSERIES_MAX_CPU_COMPAT); }

+ /* l2-cache-size before 3001000 does not accept INT64_MAX */ + if (qemuCaps->version >= 3001000) + virQEMUCapsSet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE); +

Not a fan of this.

The 'l2-cache-size' was supported since QEMU 2.2. A "newer" algorithm for cache adjustment has been supported since QEMU 2.12 (and there's l2-cache-entry-size that "could be used" to know that). Then there's some unreferenced commit indicating usage of INT64_MAX. Tracking that down, I find qemu.git commit 6c1c8d5d from Max which enforces MAX_INT.

This is a failure rather than enforcement. And AFAIU code that limit cache to appropriate maximum when INT64_MAX is given in read_cache_sizes:

*l2_cache_size = MIN(max_l2_cache, l2_cache_max_setting);

only appeared after release of 3.0 in

commit b749562d9822d14ef69c9eaa5f85903010b86c30 Author: Leonid Bloch <lbloch@janustech.com> Date: Wed Sep 26 19:04:43 2018 +0300

qcow2: Assign the L2 cache relatively to the image size

ie setting cache size to INT64_MAX before 3.1 will fail. In other words in earlier versions there were no value to specify that cache size need to be set to maximum. You can calculate this value yourself knowing disk size and disk cluster size and set it but this is not convinient.

So prior to this patch the max could be 32 MB? And now it's calculate-able?

32 MB is default value on Linux in 3.1 (the patch set it it to 1 MB but later patch 80668d0f increases to 32 MB).

Before the patch one can set greater values but can not set INT64_MAX. The exact value that can be set depends on cluster size due to the check againts INT_MAX you already mentioned. So one can set up to INT_MAX * cluster_size, but cluster_size is unknown to us (?). Given minium cluster size is 512 bytes, we can set INT_MAX * 512 (1 TB) always. This will cover 64 TB disks for cluster size 512 bytes and 8192 PB for default cluster size. So we should refuse to start with policy 'maximum' and disk size greater then 64 TB. Or may be we can just check cluster size at start up. How do you think?

I'm not sure I'd go through the trouble of determining the size. If we're not changing the default cluster_size, then using 512 "seems" reasonable (whether others feel the same way who knows yet). I'm not even sure if we already get that information - feel free to research and add though.

I'm not sure I would "refuse" to start - think in terms of what is the downside to not being able to have a large enough value - well that seems to be I/O performance. If we can get "better" I/O performance by providing "something" more than "default" for those environments, then I would think that's better than doing nothing or failing to start.

I'm not sure how much "effort" you want to put into the window of support prior to 3.1 where "maximum" is true maximum. It could be simple enough to document that for prior to 3.1 the maximum value is limited to XXX. After 3.1 it's essentially unlimited since libvirt can provide the maximum integer value to QEMU and QEMU then decides the maximum up to that value. There's ways to wordsmith it - I'm pretty sure it's been done before, but I don't have an example readily available (search docs for size, maximum, or default)...

...
By the way the patch works only for -blockdev configuration which is available since QEMU 2.10 AFAIK. So we could purse to support 2.10, 2.11, 2.12 and 3.0 version in principle.

Ah true, the --blockdev option is required.

I've put the "artificial" point in time of 2.12 because that when I recall Berto making adjustments to the cache size algorithms and when the @l2-cache-entry-size appeared in qapi/block-core.json. I think prior to those changes the algorithm was even more difficult to decipher.

UPDATE Turns out -blockdev will be supported only starting from QEMU 3.0.0 according to [1]. I thought 2.10 and further versions will be supported too because grepped 'qdev' in tests/qemucapabilitiesdata/ - turns out I found support for query-block command :) So I think extra complexity to support just 3.0.0 is unnessesary. Nikolay [1] [PATCHv2 62/62] DO NOT APPLY: Enable QEMU_CAPS_BLOCKDEV if 'query-blockstats' works with -blockdev https://www.redhat.com/archives/libvir-list/2018-August/msg00755.html --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -1307,6 +1307,7 @@ static struct virQEMUCapsStringFlags virQEMUCapsQMPSchemaQueries[] = { { "blockdev-add/arg-type/+qcow2/encrypt/+luks/key-secret", QEMU_CAPS_QCOW2_LUKS }, { "nbd-server-start/arg-type/tls-creds", QEMU_CAPS_NBD_TLS }, { "screendump/arg-type/device", QEMU_CAPS_SCREENDUMP_DEVICE }, + { "query-blockstats/ret-type/qdev", QEMU_CAPS_BLOCKDEV }, };

...

...
...
Do you think it would be "worthwhile" to add logic that knows QEMU 2.12 and 3.0 could still support using l2_cache_size, but with a lower value say 32MB? Then if we find 3.1 installed then we can supply INT_MAX for the value? Oh, and if the "maximum" attribute isn't set, then the default of 0 is used. Thus being able to use "Z" instead of "I" for patch 3?

In default case l2-cache-size just does not get set.

Yes on earlier support and I think using l2-cache-entry-size will give us a point in time to support from. The max size chosen is up to you - I think if we document what we set to prior to QEMU 3.1, then we'll be good.

Adding a second capability that would be (yuck) version based because we know a bug was fixed is going to have to be acceptable. Then, the logic to "set" the value would be something like:

if (domain maximum provided) { if (some 3.1 capability is set) max = INT64_MAX; else if (at least 2.12 capability is set) max = "whatever smaller value you choose" else error this qemu doesn't support setting... }

...
...
BTW: One issue with providing a numerical QEMU version for something is that someone could backport that patch (and it's dependencies) into some maintenance branch or even downstream repo that won't report as 3.1, but would have the patch. But since libvirt would only accept 3.1 or later, it wouldn't allow usage.

Does not look critical. The functionality just won't work from start and when they start to investigate why in the result they can backport appropriate qemu patches as well and fix libvirt caps code for example.

True, not critical, but it's one of the reasons numerical version checking is disliked. Similarly providing in docs that something is supported as of QEMU maj.min will get "dinged" with the reasoning of what if someone backports... and I have a similar response to yours for that.

John

...
Nikolay

...
...
...
Still does that really matter? Let QEMU pick their own max and don't have libvirt be the arbiter of that (like I noted in my 1/4 review). My reasoning is that there's been quite a few "adjustments" to the algorithms along the way. Those adjustments are one of the concerns voiced in the past why making any "semi-permanent" change to the value in some libvirt XML format has been NACKed historically. THus by placing boundaries that are true today we limit ourselves for the future.

IIUC setting INT64_MAX is ok in this respect. It is not real value for cache but rather order for QEMU to pick up one.

Right providing some unrealistically large value would appear to work. What that value "could be" for 2.12 and 3.0.0 which do support some level of alteration and would be 'nice' to include in the mix, but not required I suppose. And yes, I do have some downstream representation in mind by thinking about this - it's in beta now ;-).

John

...
...
BTW: If 3.1 was the "base" from which you want to work, then adjustments to the tests/qemucapabilitiesdata/*3_1*{.replies|.xml } files would be required as evidenced by your patch 4. The *.xml file would need to have the correct <version>3001000</version> setting which should allow patch4 to be merged into patch3.

Yeah, but 3.1 is not yet released and I need blockdev also as patch only makes difference in latter case.

Nikolay

...
John

...
if (virQEMUCapsProbeQMPCommands(qemuCaps, mon) < 0) goto cleanup;

diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 6bb9a2c..bb2ac17 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -493,6 +493,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for syntax-check */ QEMU_CAPS_VFIO_PCI_DISPLAY, /* -device vfio-pci.display */ QEMU_CAPS_BLOCKDEV, /* -blockdev and blockdev-add are supported */ QEMU_CAPS_DEVICE_VFIO_AP, /* -device vfio-ap */ + QEMU_CAPS_QCOW2_L2_CACHE_SIZE, /* -blockdev supports l2-cache-size with INT64_MAX value */

QEMU_CAPS_LAST /* this must always be the last item */ } virQEMUCapsFlags;

John Ferlan

21 Dec 21 Dec

1:24 p.m.

New subject: [libvirt] [PATCH v2 2/4] qemu: caps: add QEMU_CAPS_QCOW2_L2_CACHE_SIZE

[...]

...

...
...
By the way the patch works only for -blockdev configuration which is available since QEMU 2.10 AFAIK. So we could purse to support 2.10, 2.11, 2.12 and 3.0 version in principle.

Ah true, the --blockdev option is required.

I've put the "artificial" point in time of 2.12 because that when I recall Berto making adjustments to the cache size algorithms and when the @l2-cache-entry-size appeared in qapi/block-core.json. I think prior to those changes the algorithm was even more difficult to decipher.

UPDATE

Turns out -blockdev will be supported only starting from QEMU 3.0.0 according to [1]. I thought 2.10 and further versions will be supported too because grepped 'qdev' in tests/qemucapabilitiesdata/ - turns out I found support for query-block command :)

So I think extra complexity to support just 3.0.0 is unnessesary.

Nikolay

Since I see you pinged on public IRC - I'm OK with using -blockdev as the base - it's the "implementation detail" that perhaps can be noted when deciding whether to apply/support the cache size. I think then the only 'difference' would be the QEMU 3.1 change that allows for a more true maximum value. Since that "detail" would/should be hidden from consumers, maybe you just use that QEMU_CAPS_QCOW2_L2_CACHE_SIZE value to determine whether some arbitrarily, but algorithmic value is used or if the cap is there just use INT64_MAX. And by the former I mean, consider what the default values are, do some math and use that as the max with the appropriate explanation. John Also just so you're aware - like many other Red Hat associates, I'm "technically" on vacation until after Jan 1. It's our annual shutdown period and while

...

[1] [PATCHv2 62/62] DO NOT APPLY: Enable QEMU_CAPS_BLOCKDEV if 'query-blockstats' works with -blockdev https://www.redhat.com/archives/libvir-list/2018-August/msg00755.html

--- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -1307,6 +1307,7 @@ static struct virQEMUCapsStringFlags virQEMUCapsQMPSchemaQueries[] = { { "blockdev-add/arg-type/+qcow2/encrypt/+luks/key-secret", QEMU_CAPS_QCOW2_LUKS }, { "nbd-server-start/arg-type/tls-creds", QEMU_CAPS_NBD_TLS }, { "screendump/arg-type/device", QEMU_CAPS_SCREENDUMP_DEVICE }, + { "query-blockstats/ret-type/qdev", QEMU_CAPS_BLOCKDEV }, };

[...]

Nikolay Shirokovskiy

8 Nov 8 Nov

1:02 p.m.

New subject: [libvirt] [PATCH v2 3/4] qemu: support metadata-cache-size for blockdev

Just set l2-cache-size to INT64_MAX for all format nodes of qcow2 type in block node graph. -drive configuration is not supported because we can not set l2 cache size down the backing chain in this case. Note that imlementation sets l2-cache-size and not cache-size. Unfortunately at time of this patch setting cache-size to INT64_MAX fails and as guest performance depends only on l2 cache size and not refcount cache size (which is documented in recent qemu) we can set l2 directly. Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_block.c | 5 ++++- src/qemu/qemu_command.c | 23 +++++++++++++++++++++++ src/qemu/qemu_domain.c | 2 ++ src/util/virstoragefile.c | 1 + src/util/virstoragefile.h | 1 + 5 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c index 5321dda..8771cc1 100644 --- a/src/qemu/qemu_block.c +++ b/src/qemu/qemu_block.c @@ -1322,7 +1322,6 @@ qemuBlockStorageSourceGetFormatQcow2Props(virStorageSourcePtr src, * 'pass-discard-snapshot' * 'pass-discard-other' * 'overlap-check' - * 'l2-cache-size' * 'l2-cache-entry-size' * 'refcount-cache-size' * 'cache-clean-interval' @@ -1331,6 +1330,10 @@ qemuBlockStorageSourceGetFormatQcow2Props(virStorageSourcePtr src, if (qemuBlockStorageSourceGetFormatQcowGenericProps(src, "qcow2", props) < 0) return -1; + if (src->metadata_cache_size == VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM && + virJSONValueObjectAdd(props, "I:l2-cache-size", INT64_MAX, NULL) < 0) + return -1; + return 0; } diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f59cbf5..12b2c8d 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -1330,6 +1330,20 @@ qemuCheckDiskConfig(virDomainDiskDefPtr disk, return -1; } + if (disk->metadata_cache_size) { + if (disk->src->format != VIR_STORAGE_FILE_QCOW2) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("metadata_cache_size can only be set for qcow2 disks")); + return -1; + } + + if (disk->metadata_cache_size != VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("metadata_cache_size can only be set to 'maximum'")); + return -1; + } + } + if (qemuCaps) { if (disk->serial && disk->bus == VIR_DOMAIN_DISK_BUS_SCSI && @@ -1353,6 +1367,15 @@ qemuCheckDiskConfig(virDomainDiskDefPtr disk, _("detect_zeroes is not supported by this QEMU binary")); return -1; } + + if (disk->metadata_cache_size && + !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_BLOCKDEV) && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE))) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("setting metadata_cache_size is not supported by " + "this QEMU binary")); + return -1; + } } if (disk->serial && diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 045a7b4..23d9348 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9074,6 +9074,7 @@ qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk, /* "snapshot" is a libvirt internal field and thus can be changed */ /* startupPolicy is allowed to be updated. Therefore not checked here. */ CHECK_EQ(transient, "transient", true); + CHECK_EQ(metadata_cache_size, "metadata_cache_size", true); /* Note: For some address types the address auto generation for * @disk has still not happened at this point (e.g. driver @@ -13244,6 +13245,7 @@ qemuDomainPrepareDiskSourceData(virDomainDiskDefPtr disk, src->iomode = disk->iomode; src->cachemode = disk->cachemode; src->discard = disk->discard; + src->metadata_cache_size = disk->metadata_cache_size; if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY) src->floppyimg = true; diff --git a/src/util/virstoragefile.c b/src/util/virstoragefile.c index 94c32d8..9089e2f 100644 --- a/src/util/virstoragefile.c +++ b/src/util/virstoragefile.c @@ -2210,6 +2210,7 @@ virStorageSourceCopy(const virStorageSource *src, ret->cachemode = src->cachemode; ret->discard = src->discard; ret->detect_zeroes = src->detect_zeroes; + ret->metadata_cache_size = src->metadata_cache_size; /* storage driver metadata are not copied */ ret->drv = NULL; diff --git a/src/util/virstoragefile.h b/src/util/virstoragefile.h index 3ff6c4f..8b57399 100644 --- a/src/util/virstoragefile.h +++ b/src/util/virstoragefile.h @@ -331,6 +331,7 @@ struct _virStorageSource { int cachemode; /* enum virDomainDiskCache */ int discard; /* enum virDomainDiskDiscard */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskImageMetadataCacheSize */ bool floppyimg; /* set to true if the storage source is going to be used as a source for floppy drive */ -- 1.8.3.1

John Ferlan

10 Dec 10 Dec

5 p.m.

New subject: [libvirt] [PATCH v2 3/4] qemu: support metadata-cache-size for blockdev

On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...

Just set l2-cache-size to INT64_MAX for all format nodes of qcow2 type in block node graph.

-drive configuration is not supported because we can not set l2 cache size down the backing chain in this case.

Note that imlementation sets l2-cache-size and not cache-size.

implementation

...

Unfortunately at time of this patch setting cache-size to INT64_MAX fails and as guest performance depends only on l2 cache size and not refcount cache size (which is documented in recent qemu) we can set l2 directly.

More fodder for the let's not take the all or nothing approach. Say nothing of introducing cache-size and refcount-cache-size terminology in a commit message when I believe it'd be better in code...

...

Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_block.c | 5 ++++- src/qemu/qemu_command.c | 23 +++++++++++++++++++++++ src/qemu/qemu_domain.c | 2 ++ src/util/virstoragefile.c | 1 + src/util/virstoragefile.h | 1 + 5 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c index 5321dda..8771cc1 100644 --- a/src/qemu/qemu_block.c +++ b/src/qemu/qemu_block.c @@ -1322,7 +1322,6 @@ qemuBlockStorageSourceGetFormatQcow2Props(virStorageSourcePtr src, * 'pass-discard-snapshot' * 'pass-discard-other' * 'overlap-check' - * 'l2-cache-size' * 'l2-cache-entry-size' * 'refcount-cache-size' * 'cache-clean-interval' @@ -1331,6 +1330,10 @@ qemuBlockStorageSourceGetFormatQcow2Props(virStorageSourcePtr src, if (qemuBlockStorageSourceGetFormatQcowGenericProps(src, "qcow2", props) < 0) return -1;

I think this is where you indicate why l2-cache-size is only being used and what "downside" there is to adding 'cache-size' and/or 'refcount-cache-size'. If I'm reading code, it's a lot "nicer" to find that information when reading rather than having to go find the commit where this was added and find the comment about why something wasn't added.

...

+ if (src->metadata_cache_size == VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM &&> + virJSONValueObjectAdd(props, "I:l2-cache-size", INT64_MAX, NULL) < 0)

QEMU uses "INT_MAX" which is different than INT64_MAX by a few magnitudes - although the math in the code may help in this case. As for "I"... Maybe "Z" or "Y" would be better choices... or "U"... The json schema can accept an 'int' although read_cache_sizes seems to allow a uint64_t although perhaps limited (I didn't have the energy to follow the math). The rest of the changes could be different based on the patch1 adjustments. John

...

+ return -1; +> return 0; }

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f59cbf5..12b2c8d 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -1330,6 +1330,20 @@ qemuCheckDiskConfig(virDomainDiskDefPtr disk, return -1; }

+ if (disk->metadata_cache_size) { + if (disk->src->format != VIR_STORAGE_FILE_QCOW2) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("metadata_cache_size can only be set for qcow2 disks")); + return -1; + } + + if (disk->metadata_cache_size != VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("metadata_cache_size can only be set to 'maximum'")); + return -1; + } + } + if (qemuCaps) { if (disk->serial && disk->bus == VIR_DOMAIN_DISK_BUS_SCSI && @@ -1353,6 +1367,15 @@ qemuCheckDiskConfig(virDomainDiskDefPtr disk, _("detect_zeroes is not supported by this QEMU binary")); return -1; } + + if (disk->metadata_cache_size && + !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_BLOCKDEV) && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE))) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("setting metadata_cache_size is not supported by " + "this QEMU binary")); + return -1; + } }

if (disk->serial && diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 045a7b4..23d9348 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9074,6 +9074,7 @@ qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk, /* "snapshot" is a libvirt internal field and thus can be changed */ /* startupPolicy is allowed to be updated. Therefore not checked here. */ CHECK_EQ(transient, "transient", true); + CHECK_EQ(metadata_cache_size, "metadata_cache_size", true);

/* Note: For some address types the address auto generation for * @disk has still not happened at this point (e.g. driver @@ -13244,6 +13245,7 @@ qemuDomainPrepareDiskSourceData(virDomainDiskDefPtr disk, src->iomode = disk->iomode; src->cachemode = disk->cachemode; src->discard = disk->discard; + src->metadata_cache_size = disk->metadata_cache_size;

if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY) src->floppyimg = true; diff --git a/src/util/virstoragefile.c b/src/util/virstoragefile.c index 94c32d8..9089e2f 100644 --- a/src/util/virstoragefile.c +++ b/src/util/virstoragefile.c @@ -2210,6 +2210,7 @@ virStorageSourceCopy(const virStorageSource *src, ret->cachemode = src->cachemode; ret->discard = src->discard; ret->detect_zeroes = src->detect_zeroes; + ret->metadata_cache_size = src->metadata_cache_size;

/* storage driver metadata are not copied */ ret->drv = NULL; diff --git a/src/util/virstoragefile.h b/src/util/virstoragefile.h index 3ff6c4f..8b57399 100644 --- a/src/util/virstoragefile.h +++ b/src/util/virstoragefile.h @@ -331,6 +331,7 @@ struct _virStorageSource { int cachemode; /* enum virDomainDiskCache */ int discard; /* enum virDomainDiskDiscard */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskImageMetadataCacheSize */

bool floppyimg; /* set to true if the storage source is going to be used as a source for floppy drive */

Nikolay Shirokovskiy

11 Dec 11 Dec

11:24 a.m.

New subject: [libvirt] [PATCH v2 3/4] qemu: support metadata-cache-size for blockdev

On 10.12.2018 20:00, John Ferlan wrote:

...

On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
Just set l2-cache-size to INT64_MAX for all format nodes of qcow2 type in block node graph.

-drive configuration is not supported because we can not set l2 cache size down the backing chain in this case.

Note that imlementation sets l2-cache-size and not cache-size.

implementation

...
Unfortunately at time of this patch setting cache-size to INT64_MAX fails and as guest performance depends only on l2 cache size and not refcount cache size (which is documented in recent qemu) we can set l2 directly.

More fodder for the let's not take the all or nothing approach. Say nothing of introducing cache-size and refcount-cache-size terminology in a commit message when I believe it'd be better in code...

...
Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_block.c | 5 ++++- src/qemu/qemu_command.c | 23 +++++++++++++++++++++++ src/qemu/qemu_domain.c | 2 ++ src/util/virstoragefile.c | 1 + src/util/virstoragefile.h | 1 + 5 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c index 5321dda..8771cc1 100644 --- a/src/qemu/qemu_block.c +++ b/src/qemu/qemu_block.c @@ -1322,7 +1322,6 @@ qemuBlockStorageSourceGetFormatQcow2Props(virStorageSourcePtr src, * 'pass-discard-snapshot' * 'pass-discard-other' * 'overlap-check' - * 'l2-cache-size' * 'l2-cache-entry-size' * 'refcount-cache-size' * 'cache-clean-interval' @@ -1331,6 +1330,10 @@ qemuBlockStorageSourceGetFormatQcow2Props(virStorageSourcePtr src, if (qemuBlockStorageSourceGetFormatQcowGenericProps(src, "qcow2", props) < 0) return -1;

I think this is where you indicate why l2-cache-size is only being used and what "downside" there is to adding 'cache-size' and/or 'refcount-cache-size'. If I'm reading code, it's a lot "nicer" to find that information when reading rather than having to go find the commit where this was added and find the comment about why something wasn't added.

Well from my POV the reason we use l2-cache-size here is straightforward - we actually need to tune this parameter for performance. refcount cache makes difference only for snapshots (at least this is what I read from qemu docs) and cache-size is a kind of combined cache, a convinience tool. May be still refcount need to be tuned in accordance with l2-cache size but this is not clear from qemu description, so at least to me this place does not seem cryptic or something...

...

...
+ if (src->metadata_cache_size == VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM &&> + virJSONValueObjectAdd(props, "I:l2-cache-size", INT64_MAX, NULL) < 0)

QEMU uses "INT_MAX" which is different than INT64_MAX by a few magnitudes - although the math in the code may help in this case.> As for "I"... Maybe "Z" or "Y" would be better choices... or "U"... The json schema can accept an 'int' although read_cache_sizes seems to allow a uint64_t although perhaps limited (I didn't have the energy to follow the math).

It can be any one of these I guess as we have only one value yet. Nikolay

...

The rest of the changes could be different based on the patch1 adjustments.

John

...
+ return -1; +> return 0; }

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f59cbf5..12b2c8d 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -1330,6 +1330,20 @@ qemuCheckDiskConfig(virDomainDiskDefPtr disk, return -1; }

+ if (disk->metadata_cache_size) { + if (disk->src->format != VIR_STORAGE_FILE_QCOW2) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("metadata_cache_size can only be set for qcow2 disks")); + return -1; + } + + if (disk->metadata_cache_size != VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("metadata_cache_size can only be set to 'maximum'")); + return -1; + } + } + if (qemuCaps) { if (disk->serial && disk->bus == VIR_DOMAIN_DISK_BUS_SCSI && @@ -1353,6 +1367,15 @@ qemuCheckDiskConfig(virDomainDiskDefPtr disk, _("detect_zeroes is not supported by this QEMU binary")); return -1; } + + if (disk->metadata_cache_size && + !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_BLOCKDEV) && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE))) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("setting metadata_cache_size is not supported by " + "this QEMU binary")); + return -1; + } }

if (disk->serial && diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 045a7b4..23d9348 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9074,6 +9074,7 @@ qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk, /* "snapshot" is a libvirt internal field and thus can be changed */ /* startupPolicy is allowed to be updated. Therefore not checked here. */ CHECK_EQ(transient, "transient", true); + CHECK_EQ(metadata_cache_size, "metadata_cache_size", true);

/* Note: For some address types the address auto generation for * @disk has still not happened at this point (e.g. driver @@ -13244,6 +13245,7 @@ qemuDomainPrepareDiskSourceData(virDomainDiskDefPtr disk, src->iomode = disk->iomode; src->cachemode = disk->cachemode; src->discard = disk->discard; + src->metadata_cache_size = disk->metadata_cache_size;

if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY) src->floppyimg = true; diff --git a/src/util/virstoragefile.c b/src/util/virstoragefile.c index 94c32d8..9089e2f 100644 --- a/src/util/virstoragefile.c +++ b/src/util/virstoragefile.c @@ -2210,6 +2210,7 @@ virStorageSourceCopy(const virStorageSource *src, ret->cachemode = src->cachemode; ret->discard = src->discard; ret->detect_zeroes = src->detect_zeroes; + ret->metadata_cache_size = src->metadata_cache_size;

/* storage driver metadata are not copied */ ret->drv = NULL; diff --git a/src/util/virstoragefile.h b/src/util/virstoragefile.h index 3ff6c4f..8b57399 100644 --- a/src/util/virstoragefile.h +++ b/src/util/virstoragefile.h @@ -331,6 +331,7 @@ struct _virStorageSource { int cachemode; /* enum virDomainDiskCache */ int discard; /* enum virDomainDiskDiscard */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskImageMetadataCacheSize */

bool floppyimg; /* set to true if the storage source is going to be used as a source for floppy drive */

John Ferlan

4:47 p.m.

New subject: [libvirt] [PATCH v2 3/4] qemu: support metadata-cache-size for blockdev

On 12/11/18 6:24 AM, Nikolay Shirokovskiy wrote:

...

On 10.12.2018 20:00, John Ferlan wrote:

...
On 11/8/18 8:02 AM, Nikolay Shirokovskiy wrote:

...
Just set l2-cache-size to INT64_MAX for all format nodes of qcow2 type in block node graph.

-drive configuration is not supported because we can not set l2 cache size down the backing chain in this case.

Note that imlementation sets l2-cache-size and not cache-size.

implementation

...
Unfortunately at time of this patch setting cache-size to INT64_MAX fails and as guest performance depends only on l2 cache size and not refcount cache size (which is documented in recent qemu) we can set l2 directly.

More fodder for the let's not take the all or nothing approach. Say nothing of introducing cache-size and refcount-cache-size terminology in a commit message when I believe it'd be better in code...

...
Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- src/qemu/qemu_block.c | 5 ++++- src/qemu/qemu_command.c | 23 +++++++++++++++++++++++ src/qemu/qemu_domain.c | 2 ++ src/util/virstoragefile.c | 1 + src/util/virstoragefile.h | 1 + 5 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c index 5321dda..8771cc1 100644 --- a/src/qemu/qemu_block.c +++ b/src/qemu/qemu_block.c @@ -1322,7 +1322,6 @@ qemuBlockStorageSourceGetFormatQcow2Props(virStorageSourcePtr src, * 'pass-discard-snapshot' * 'pass-discard-other' * 'overlap-check' - * 'l2-cache-size' * 'l2-cache-entry-size' * 'refcount-cache-size' * 'cache-clean-interval' @@ -1331,6 +1330,10 @@ qemuBlockStorageSourceGetFormatQcow2Props(virStorageSourcePtr src, if (qemuBlockStorageSourceGetFormatQcowGenericProps(src, "qcow2", props) < 0) return -1;

I think this is where you indicate why l2-cache-size is only being used and what "downside" there is to adding 'cache-size' and/or 'refcount-cache-size'. If I'm reading code, it's a lot "nicer" to find that information when reading rather than having to go find the commit where this was added and find the comment about why something wasn't added.

Well from my POV the reason we use l2-cache-size here is straightforward - we actually need to tune this parameter for performance. refcount cache makes difference only for snapshots (at least this is what I read from qemu docs) and cache-size is a kind of combined cache, a convinience tool. May be still refcount need to be tuned in accordance with l2-cache size but this is not clear from qemu description, so at least to me this place does not seem cryptic or something...

Right, my point was more putting a comment here at least acknowledges the existence of the other parameters which were specifically not chosen for particular reasons.

...

...
...
+ if (src->metadata_cache_size == VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM &&> + virJSONValueObjectAdd(props, "I:l2-cache-size", INT64_MAX, NULL) < 0)

QEMU uses "INT_MAX" which is different than INT64_MAX by a few magnitudes - although the math in the code may help in this case.> As for "I"... Maybe "Z" or "Y" would be better choices... or "U"... The json schema can accept an 'int' although read_cache_sizes seems to allow a uint64_t although perhaps limited (I didn't have the energy to follow the math).

Fair enough - Z would be something considerable if you think about my most recent comment and idea for providing some value for 2.12 and 3.0 from patch 2. Otherwise, 'I' could stay. Using 'J' or 'Y' would be only if the user provided value was used directly... 'U' has "interesting" QEMU side effects as described in virJSONValueObjectAddVArgs Mostly noted to follow up previous comments so that if any other changes to the algorithm were made that we didn't forget this spot. John

...

It can be any one of these I guess as we have only one value yet.

Nikolay

...
The rest of the changes could be different based on the patch1 adjustments.

John

...
+ return -1; +> return 0; }

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index f59cbf5..12b2c8d 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -1330,6 +1330,20 @@ qemuCheckDiskConfig(virDomainDiskDefPtr disk, return -1; }

+ if (disk->metadata_cache_size) { + if (disk->src->format != VIR_STORAGE_FILE_QCOW2) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("metadata_cache_size can only be set for qcow2 disks")); + return -1; + } + + if (disk->metadata_cache_size != VIR_DOMAIN_DISK_METADATA_CACHE_SIZE_MAXIMUM) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("metadata_cache_size can only be set to 'maximum'")); + return -1; + } + } + if (qemuCaps) { if (disk->serial && disk->bus == VIR_DOMAIN_DISK_BUS_SCSI && @@ -1353,6 +1367,15 @@ qemuCheckDiskConfig(virDomainDiskDefPtr disk, _("detect_zeroes is not supported by this QEMU binary")); return -1; } + + if (disk->metadata_cache_size && + !(virQEMUCapsGet(qemuCaps, QEMU_CAPS_BLOCKDEV) && + virQEMUCapsGet(qemuCaps, QEMU_CAPS_QCOW2_L2_CACHE_SIZE))) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", + _("setting metadata_cache_size is not supported by " + "this QEMU binary")); + return -1; + } }

if (disk->serial && diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 045a7b4..23d9348 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -9074,6 +9074,7 @@ qemuDomainDiskChangeSupported(virDomainDiskDefPtr disk, /* "snapshot" is a libvirt internal field and thus can be changed */ /* startupPolicy is allowed to be updated. Therefore not checked here. */ CHECK_EQ(transient, "transient", true); + CHECK_EQ(metadata_cache_size, "metadata_cache_size", true);

/* Note: For some address types the address auto generation for * @disk has still not happened at this point (e.g. driver @@ -13244,6 +13245,7 @@ qemuDomainPrepareDiskSourceData(virDomainDiskDefPtr disk, src->iomode = disk->iomode; src->cachemode = disk->cachemode; src->discard = disk->discard; + src->metadata_cache_size = disk->metadata_cache_size;

if (disk->device == VIR_DOMAIN_DISK_DEVICE_FLOPPY) src->floppyimg = true; diff --git a/src/util/virstoragefile.c b/src/util/virstoragefile.c index 94c32d8..9089e2f 100644 --- a/src/util/virstoragefile.c +++ b/src/util/virstoragefile.c @@ -2210,6 +2210,7 @@ virStorageSourceCopy(const virStorageSource *src, ret->cachemode = src->cachemode; ret->discard = src->discard; ret->detect_zeroes = src->detect_zeroes; + ret->metadata_cache_size = src->metadata_cache_size;

/* storage driver metadata are not copied */ ret->drv = NULL; diff --git a/src/util/virstoragefile.h b/src/util/virstoragefile.h index 3ff6c4f..8b57399 100644 --- a/src/util/virstoragefile.h +++ b/src/util/virstoragefile.h @@ -331,6 +331,7 @@ struct _virStorageSource { int cachemode; /* enum virDomainDiskCache */ int discard; /* enum virDomainDiskDiscard */ int detect_zeroes; /* enum virDomainDiskDetectZeroes */ + int metadata_cache_size; /* enum virDomainDiskImageMetadataCacheSize */

bool floppyimg; /* set to true if the storage source is going to be used as a source for floppy drive */

Nikolay Shirokovskiy

8 Nov 8 Nov

1:02 p.m.

New subject: [libvirt] [PATCH v2 4/4] DO NOT APPLY: add xml2argv test for metadata_cache_size

This needs next: - turning QEMU_CAPS_BLOCKDEV on - adding caps data for not yet released qemu 3.1 Signed-off-by: Nikolay Shirokovskiy <nshirokovskiy@virtuozzo.com> --- .../qemuxml2argvdata/disk-metadata_cache_size.args | 39 ++++++++++++++++++++++ tests/qemuxml2argvtest.c | 2 ++ 2 files changed, 41 insertions(+) create mode 100644 tests/qemuxml2argvdata/disk-metadata_cache_size.args diff --git a/tests/qemuxml2argvdata/disk-metadata_cache_size.args b/tests/qemuxml2argvdata/disk-metadata_cache_size.args new file mode 100644 index 0000000..ec90d2f --- /dev/null +++ b/tests/qemuxml2argvdata/disk-metadata_cache_size.args @@ -0,0 +1,39 @@ +LC_ALL=C \ +PATH=/bin \ +HOME=/home/test \ +USER=test \ +LOGNAME=test \ +QEMU_AUDIO_DRV=none \ +/usr/bin/qemu-system-x86_64 \ +-name test \ +-S \ +-machine pc-0.13,accel=tcg,usb=off,dump-guest-core=off \ +-m 1024 \ +-smp 1,sockets=1,cores=1,threads=1 \ +-uuid 468404ad-d49c-40f2-9e14-02294f9c1be3 \ +-display none \ +-no-user-config \ +-nodefaults \ +-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-test/monitor.sock,\ +server,nowait \ +-mon chardev=charmonitor,id=monitor,mode=control \ +-rtc base=utc \ +-no-shutdown \ +-no-acpi \ +-boot menu=on \ +-device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x6 \ +-usb \ +-blockdev '{"driver":"file","filename":"/var/lib/libvirt/images/f14.img",\ +"node-name":"libvirt-2-storage","read-only":false,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-2-format","read-only":false,"driver":"qcow2",\ +"l2-cache-size":9223372036854775807,"file":"libvirt-2-storage"}' \ +-device virtio-blk-pci,bus=pci.0,addr=0x4,drive=libvirt-2-format,\ +id=virtio-disk0,bootindex=2 \ +-blockdev '{"driver":"file",\ +"filename":"/var/lib/libvirt/Fedora-14-x86_64-Live-KDE.iso",\ +"node-name":"libvirt-1-storage","read-only":true,"discard":"unmap"}' \ +-blockdev '{"node-name":"libvirt-1-format","read-only":true,"driver":"raw",\ +"file":"libvirt-1-storage"}' \ +-device ide-drive,bus=ide.1,unit=0,drive=libvirt-1-format,id=ide0-1-0,\ +bootindex=1 \ +-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c index 39a7f1f..6f42b09 100644 --- a/tests/qemuxml2argvtest.c +++ b/tests/qemuxml2argvtest.c @@ -3044,6 +3044,8 @@ mymain(void) DO_TEST_CAPS_ARCH_LATEST("x86_64-pc-headless", "x86_64"); DO_TEST_CAPS_ARCH_LATEST("x86_64-q35-headless", "x86_64"); + DO_TEST_CAPS_LATEST("disk-metadata_cache_size"); + if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) virFileDeleteTree(fakerootdir); -- 1.8.3.1

2531

Age (days ago)

2574

Last active (days ago)

List overview

Download

20 comments

2 participants

participants (2)

John Ferlan
Nikolay Shirokovskiy