[libvirt] [PATCH] esx: Handle name escaping properly

VMware uses a mix of percent-, pipe- and base64-encoding in different combinations in different places. Add a testcase for this. --- src/esx/README | 25 ++++ src/esx/esx_driver.c | 72 ++++++----- src/esx/esx_storage_driver.c | 42 ++++++- src/esx/esx_util.c | 198 ++++++++++++++++++++++++++++++ src/esx/esx_util.h | 18 +++ src/esx/esx_vi.c | 6 + src/esx/esx_vmx.c | 88 +++++--------- tests/esxutilstest.c | 51 ++++++++ tests/xml2vmxdata/xml2vmx-annotation.vmx | 2 +- 9 files changed, 405 insertions(+), 97 deletions(-) diff --git a/src/esx/README b/src/esx/README index 9ae93b1..b7a5865 100644 --- a/src/esx/README +++ b/src/esx/README @@ -86,3 +86,28 @@ It tries to cancel the blocked task, but this may not be possible, because there are task like the power-on task that is marked as non-cancelable. So the driver may leave blocked tasks behind if automatic question handling is disabled. + + + +Different escaping schemes used in different places +=================================================== + +A domain name in the vSphere API has [%/\] escaped as %XX (percent-encoding), +where XX is the ASCII code of the escaped char in hex. + +A domainName entry in a VMX config file is percent-encoded and has [|"] escaped +as |XX (pipe-encoding). + +A annotation entry in a VMX config file is pipe-encoded. + +A datastore item name has the special Windows path characters ["*<>:|?] +replaced by underscores (_). The result is escaped using percent-encoding and +base64-encoding. This isn't a bijective encoding. Therefore, escaped datastore +item names cannot be unescaped completely. + +For base64-encoding sequences of chars that don't match [a-zA-Z0-9'(),. _-] +are replaced by their base64 form (the padding is omitted). An encoded sequence +begins with a plus (+), ends with a minus (-) and can contain a plus (+). The +minus (-) is omitted if the string ends in a base64-encoded sequence. VMware +uses the comma (,) instead of the slash (/) in the base64 alphabet to avoid +conflicts with the slash as path separator. diff --git a/src/esx/esx_driver.c b/src/esx/esx_driver.c index e959be2..8bc3be2 100644 --- a/src/esx/esx_driver.c +++ b/src/esx/esx_driver.c @@ -2708,7 +2708,6 @@ esxListDefinedDomains(virConnectPtr conn, char **const names, int maxnames) esxVI_String *propertyNameList = NULL; esxVI_ObjectContent *virtualMachineList = NULL; esxVI_ObjectContent *virtualMachine = NULL; - esxVI_DynamicProperty *dynamicProperty = NULL; esxVI_VirtualMachinePowerState powerState; int count = 0; int i; @@ -2745,27 +2744,15 @@ esxListDefinedDomains(virConnectPtr conn, char **const names, int maxnames) continue; } - for (dynamicProperty = virtualMachine->propSet; - dynamicProperty != NULL; - dynamicProperty = dynamicProperty->_next) { - if (STREQ(dynamicProperty->name, "name")) { - if (esxVI_AnyType_ExpectType(dynamicProperty->val, - esxVI_Type_String) < 0) { - goto cleanup; - } - - names[count] = strdup(dynamicProperty->val->string); + names[count] = NULL; - if (names[count] == NULL) { - virReportOOMError(); - goto cleanup; - } - - count++; - break; - } + if (esxVI_GetVirtualMachineIdentity(virtualMachine, NULL, &names[count], + NULL) < 0) { + goto cleanup; } + ++count; + if (count >= maxnames) { break; } @@ -2864,14 +2851,18 @@ esxDomainCreateWithFlags(virDomainPtr domain, unsigned int flags) return result; } + + static int esxDomainCreate(virDomainPtr domain) { return esxDomainCreateWithFlags(domain, 0); } + + static virDomainPtr -esxDomainDefineXML(virConnectPtr conn, const char *xml ATTRIBUTE_UNUSED) +esxDomainDefineXML(virConnectPtr conn, const char *xml) { esxPrivate *priv = conn->privateData; virDomainDefPtr def = NULL; @@ -2883,6 +2874,7 @@ esxDomainDefineXML(virConnectPtr conn, const char *xml ATTRIBUTE_UNUSED) esxVMX_Data data; char *datastoreName = NULL; char *directoryName = NULL; + char *escapedName = NULL; virBuffer buffer = VIR_BUFFER_INITIALIZER; char *url = NULL; char *datastoreRelatedPath = NULL; @@ -2912,6 +2904,13 @@ esxDomainDefineXML(virConnectPtr conn, const char *xml ATTRIBUTE_UNUSED) goto cleanup; } + if (virtualMachine == NULL && + esxVI_LookupVirtualMachineByName(priv->primary, def->name, NULL, + &virtualMachine, + esxVI_Occurrence_OptionalItem) < 0) { + goto cleanup; + } + if (virtualMachine != NULL) { /* FIXME */ ESX_ERROR(VIR_ERR_INTERNAL_ERROR, "%s", @@ -2992,7 +2991,13 @@ esxDomainDefineXML(virConnectPtr conn, const char *xml ATTRIBUTE_UNUSED) virBufferAddChar(&buffer, '/'); } - virBufferURIEncodeString(&buffer, def->name); + escapedName = esxUtil_EscapeDatastoreItem(def->name); + + if (escapedName == NULL) { + goto cleanup; + } + + virBufferURIEncodeString(&buffer, escapedName); virBufferAddLit(&buffer, ".vmx?dcPath="); virBufferURIEncodeString(&buffer, priv->primary->datacenter->name); virBufferAddLit(&buffer, "&dsName="); @@ -3005,29 +3010,31 @@ esxDomainDefineXML(virConnectPtr conn, const char *xml ATTRIBUTE_UNUSED) url = virBufferContentAndReset(&buffer); + /* Check, if VMX file already exists */ + /* FIXME */ + + /* Upload VMX file */ + VIR_DEBUG("Uploading .vmx config, url='%s' vmx='%s'", url, vmx); + + if (esxVI_Context_UploadFile(priv->primary, url, vmx) < 0) { + goto cleanup; + } + + /* Register the domain */ if (directoryName != NULL) { if (virAsprintf(&datastoreRelatedPath, "[%s] %s/%s.vmx", datastoreName, - directoryName, def->name) < 0) { + directoryName, escapedName) < 0) { virReportOOMError(); goto cleanup; } } else { if (virAsprintf(&datastoreRelatedPath, "[%s] %s.vmx", datastoreName, - def->name) < 0) { + escapedName) < 0) { virReportOOMError(); goto cleanup; } } - /* Check, if VMX file already exists */ - /* FIXME */ - - /* Upload VMX file */ - if (esxVI_Context_UploadFile(priv->primary, url, vmx) < 0) { - goto cleanup; - } - - /* Register the domain */ if (esxVI_RegisterVM_Task(priv->primary, priv->primary->datacenter->vmFolder, datastoreRelatedPath, NULL, esxVI_Boolean_False, priv->primary->computeResource->resourcePool, @@ -3061,6 +3068,7 @@ esxDomainDefineXML(virConnectPtr conn, const char *xml ATTRIBUTE_UNUSED) VIR_FREE(vmx); VIR_FREE(datastoreName); VIR_FREE(directoryName); + VIR_FREE(escapedName); VIR_FREE(url); VIR_FREE(datastoreRelatedPath); esxVI_ObjectContent_Free(&virtualMachine); diff --git a/src/esx/esx_storage_driver.c b/src/esx/esx_storage_driver.c index 329020b..72e0d7a 100644 --- a/src/esx/esx_storage_driver.c +++ b/src/esx/esx_storage_driver.c @@ -915,9 +915,13 @@ esxStorageVolumeCreateXML(virStoragePoolPtr pool, const char *xmldesc, virStoragePoolDef poolDef; virStorageVolDefPtr def = NULL; char *tmp; - char *datastorePath = NULL; + char *unescapedDatastorePath = NULL; + char *unescapedDirectoryName = NULL; + char *unescapedDirectoryAndFileName = NULL; char *directoryName = NULL; + char *fileName = NULL; char *datastorePathWithoutFileName = NULL; + char *datastorePath = NULL; esxVI_FileInfo *fileInfo = NULL; esxVI_FileBackedVirtualDiskSpec *virtualDiskSpec = NULL; esxVI_ManagedObjectReference *task = NULL; @@ -995,15 +999,30 @@ esxStorageVolumeCreateXML(virStoragePoolPtr pool, const char *xmldesc, goto cleanup; } - if (virAsprintf(&datastorePath, "[%s] %s", pool->name, def->name) < 0) { + if (virAsprintf(&unescapedDatastorePath, "[%s] %s", pool->name, + def->name) < 0) { virReportOOMError(); goto cleanup; } if (def->target.format == VIR_STORAGE_FILE_VMDK) { - /* Create directory, if it doesn't exist yet */ - if (esxUtil_ParseDatastorePath(datastorePath, NULL, &directoryName, - NULL) < 0) { + /* Parse and escape datastore path */ + if (esxUtil_ParseDatastorePath(unescapedDatastorePath, NULL, + &unescapedDirectoryName, + &unescapedDirectoryAndFileName) < 0) { + goto cleanup; + } + + directoryName = esxUtil_EscapeDatastoreItem(unescapedDirectoryName); + + if (directoryName == NULL) { + goto cleanup; + } + + fileName = esxUtil_EscapeDatastoreItem(unescapedDirectoryAndFileName + + strlen(unescapedDirectoryName) + 1); + + if (fileName == NULL) { goto cleanup; } @@ -1013,6 +1032,13 @@ esxStorageVolumeCreateXML(virStoragePoolPtr pool, const char *xmldesc, goto cleanup; } + if (virAsprintf(&datastorePath, "[%s] %s/%s", pool->name, directoryName, + fileName) < 0) { + virReportOOMError(); + goto cleanup; + } + + /* Create directory, if it doesn't exist yet */ if (esxVI_LookupFileInfoByDatastorePath (priv->primary, datastorePathWithoutFileName, true, &fileInfo, esxVI_Occurrence_OptionalItem) < 0) { @@ -1115,9 +1141,13 @@ esxStorageVolumeCreateXML(virStoragePoolPtr pool, const char *xmldesc, esxVI_ObjectContent_Free(&datastore); esxVI_DatastoreInfo_Free(&datastoreInfo); virStorageVolDefFree(def); - VIR_FREE(datastorePath); + VIR_FREE(unescapedDatastorePath); + VIR_FREE(unescapedDirectoryName); + VIR_FREE(unescapedDirectoryAndFileName); VIR_FREE(directoryName); + VIR_FREE(fileName); VIR_FREE(datastorePathWithoutFileName); + VIR_FREE(datastorePath); esxVI_FileInfo_Free(&fileInfo); esxVI_FileBackedVirtualDiskSpec_Free(&virtualDiskSpec); esxVI_ManagedObjectReference_Free(&task); diff --git a/src/esx/esx_util.c b/src/esx/esx_util.c index 99cc4f6..5fe72fc 100644 --- a/src/esx/esx_util.c +++ b/src/esx/esx_util.c @@ -24,6 +24,7 @@ #include <config.h> +#include <c-ctype.h> #include <netdb.h> #include "internal.h" @@ -621,3 +622,200 @@ esxUtil_ReformatUuid(const char *input, char *output) return 0; } + + + +char * +esxUtil_EscapeHex(const char *string, char escape, const char *special) +{ + char *escaped = NULL; + size_t length = 1; /* 1 byte for termination */ + const char *tmp1 = string; + char *tmp2; + + /* Calculate length of escaped string */ + while (*tmp1 != '\0') { + if (*tmp1 == escape || strspn(tmp1, special) > 0) { + length += 2; + } + + ++tmp1; + ++length; + } + + if (VIR_ALLOC_N(escaped, length) < 0) { + virReportOOMError(); + return NULL; + } + + tmp1 = string; /* reading from this one */ + tmp2 = escaped; /* writing to this one */ + + /* Escape to 'cXX' where c is the escape char and X is a hex digit */ + while (*tmp1 != '\0') { + if (*tmp1 == escape || strspn(tmp1, special) > 0) { + *tmp2++ = escape; + + snprintf(tmp2, 3, "%02x", (unsigned int)*tmp1); + + tmp2 += 2; + } else { + *tmp2++ = *tmp1; + } + + ++tmp1; + } + + *tmp2 = '\0'; + + return escaped; +} + + + +int +esxUtil_UnescapeHex(char *string, char escape) +{ + char *tmp1 = string; /* reading from this one */ + char *tmp2 = string; /* writing to this one */ + + /* Unescape from 'cXX' where c is the escape char and X is a hex digit */ + while (*tmp1 != '\0') { + if (*tmp1 == escape) { + if (!c_isxdigit(tmp1[1]) || !c_isxdigit(tmp1[2])) { + return -1; + } + + *tmp2++ = virHexToBin(tmp1[1]) * 16 + virHexToBin(tmp1[2]); + tmp1 += 3; + } else { + *tmp2++ = *tmp1++; + } + } + + *tmp2 = '\0'; + + return 0; +} + + + +char * +esxUtil_EscapeBase64(const char *string) +{ + /* 'normal' characters don't get base64 encoded */ + static const char *normal = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),. _-"; + + /* VMware uses ',' instead of the path separator '/' in the base64 alphabet */ + static const char *base64 = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; + + virBuffer buffer = VIR_BUFFER_INITIALIZER; + const char *tmp1 = string; + size_t length; + unsigned char c1, c2, c3; + + /* Escape sequences of non-'normal' characters as base64 without padding */ + while (*tmp1 != '\0') { + length = strspn(tmp1, normal); + + if (length > 0) { + virBufferAdd(&buffer, tmp1, length); + + tmp1 += length; + } else { + length = strcspn(tmp1, normal); + + virBufferAddChar(&buffer, '+'); + + while (length > 0) { + c1 = *tmp1++; + c2 = length > 1 ? *tmp1++ : 0; + c3 = length > 2 ? *tmp1++ : 0; + + virBufferAddChar(&buffer, base64[(c1 >> 2) & 0x3f]); + virBufferAddChar(&buffer, base64[((c1 << 4) + (c2 >> 4)) & 0x3f]); + + if (length > 1) { + virBufferAddChar(&buffer, base64[((c2 << 2) + (c3 >> 6)) & 0x3f]); + } + + if (length > 2) { + virBufferAddChar(&buffer, base64[c3 & 0x3f]); + } + + length -= length > 3 ? 3 : length; + } + + if (*tmp1 != '\0') { + virBufferAddChar(&buffer, '-'); + } + } + } + + if (virBufferError(&buffer)) { + virReportOOMError(); + virBufferFreeAndReset(&buffer); + + return NULL; + } + + return virBufferContentAndReset(&buffer); +} + + + +void +esxUtil_ReplaceSpecialWindowsPathChars(char *string) +{ + /* '/' and '\\' are missing on purpose */ + static const char *specials = "\"*<>:|?"; + + char *tmp = string; + size_t length; + + while (*tmp != '\0') { + length = strspn(tmp, specials); + + while (length > 0) { + *tmp++ = '_'; + --length; + } + + if (*tmp != '\0') { + ++tmp; + } + } +} + + + +char * +esxUtil_EscapeDatastoreItem(const char *string) +{ + char *replaced = strdup(string); + char *escaped1; + char *escaped2 = NULL; + + if (replaced == NULL) { + virReportOOMError(); + return NULL; + } + + esxUtil_ReplaceSpecialWindowsPathChars(replaced); + + escaped1 = esxUtil_EscapeHexPercent(replaced); + + if (escaped1 == NULL) { + goto cleanup; + } + + escaped2 = esxUtil_EscapeBase64(escaped1); + + cleanup: + VIR_FREE(replaced); + VIR_FREE(escaped1); + + return escaped2; +} diff --git a/src/esx/esx_util.h b/src/esx/esx_util.h index 650f145..669a4f2 100644 --- a/src/esx/esx_util.h +++ b/src/esx/esx_util.h @@ -71,4 +71,22 @@ int esxUtil_GetConfigBoolean(virConfPtr conf, const char *name, bool *boolean_, int esxUtil_ReformatUuid(const char *input, char *output); +char *esxUtil_EscapeHex(const char *string, char escape, const char *special); + +# define esxUtil_EscapeHexPipe(_string) esxUtil_EscapeHex(_string, '|', "\"") + +# define esxUtil_EscapeHexPercent(_string) esxUtil_EscapeHex(_string, '%', "/\\") + +int esxUtil_UnescapeHex(char *string, char escape); + +# define esxUtil_UnescapeHexPipe(_string) esxUtil_UnescapeHex(_string, '|') + +# define esxUtil_UnescapeHexPercent(_string) esxUtil_UnescapeHex(_string, '%') + +char *esxUtil_EscapeBase64(const char *string); + +void esxUtil_ReplaceSpecialWindowsPathChars(char *string); + +char *esxUtil_EscapeDatastoreItem(const char *string); + #endif /* __ESX_UTIL_H__ */ diff --git a/src/esx/esx_vi.c b/src/esx/esx_vi.c index d361f01..c450730 100644 --- a/src/esx/esx_vi.c +++ b/src/esx/esx_vi.c @@ -2014,6 +2014,12 @@ esxVI_GetVirtualMachineIdentity(esxVI_ObjectContent *virtualMachine, goto failure; } + if (esxUtil_UnescapeHexPercent(*name) < 0) { + ESX_VI_ERROR(VIR_ERR_INTERNAL_ERROR, "%s", + _("Domain name contains invalid escape sequence")); + goto failure; + } + break; } } diff --git a/src/esx/esx_vmx.c b/src/esx/esx_vmx.c index ece16b2..e17e1e7 100644 --- a/src/esx/esx_vmx.c +++ b/src/esx/esx_vmx.c @@ -876,8 +876,6 @@ esxVMX_ParseConfig(esxVMX_Context *ctx, virCapsPtr caps, const char *vmx, long long numvcpus = 0; char *sched_cpu_affinity = NULL; char *guestOS = NULL; - char *tmp1; - char *tmp2; int controller; int bus; int port; @@ -982,34 +980,28 @@ esxVMX_ParseConfig(esxVMX_Context *ctx, virCapsPtr caps, const char *vmx, goto cleanup; } + if (def->name != NULL) { + if (esxUtil_UnescapeHexPercent(def->name) < 0 || + esxUtil_UnescapeHexPipe(def->name) < 0) { + ESX_ERROR(VIR_ERR_INTERNAL_ERROR, "%s", + _("VMX entry 'name' contains invalid escape sequence")); + goto cleanup; + } + } + /* vmx:annotation -> def:description */ if (esxUtil_GetConfigString(conf, "annotation", &def->description, true) < 0) { goto cleanup; } - /* Unescape '|XX' where X is a hex digit */ if (def->description != NULL) { - tmp1 = def->description; /* reading from this one */ - tmp2 = def->description; /* writing to this one */ - - while (*tmp1 != '\0') { - if (*tmp1 == '|') { - if (!c_isxdigit(tmp1[1]) || !c_isxdigit(tmp1[2])) { - ESX_ERROR(VIR_ERR_INTERNAL_ERROR, "%s", - _("VMX entry 'annotation' contains invalid " - "escape sequence")); - goto cleanup; - } - - *tmp2++ = virHexToBin(tmp1[1]) * 16 + virHexToBin(tmp1[2]); - tmp1 += 3; - } else { - *tmp2++ = *tmp1++; - } + if (esxUtil_UnescapeHexPipe(def->description) < 0) { + ESX_ERROR(VIR_ERR_INTERNAL_ERROR, "%s", + _("VMX entry 'annotation' contains invalid escape " + "sequence")); + goto cleanup; } - - *tmp2 = '\0'; } /* vmx:memsize -> def:mem.max_balloon */ @@ -2460,9 +2452,8 @@ esxVMX_FormatConfig(esxVMX_Context *ctx, virCapsPtr caps, virDomainDefPtr def, int sched_cpu_affinity_length; unsigned char zero[VIR_UUID_BUFLEN]; virBuffer buffer = VIR_BUFFER_INITIALIZER; - size_t length; - char *tmp1; - char *tmp2; + char *preliminaryDisplayName = NULL; + char *displayName = NULL; char *annotation = NULL; bool scsi_present[4] = { false, false, false, false }; int scsi_virtualDev[4] = { -1, -1, -1, -1 }; @@ -2536,44 +2527,23 @@ esxVMX_FormatConfig(esxVMX_Context *ctx, virCapsPtr caps, virDomainDefPtr def, } /* def:name -> vmx:displayName */ - virBufferVSprintf(&buffer, "displayName = \"%s\"\n", def->name); - - /* def:description -> vmx:annotation */ - if (def->description != NULL) { - /* Escape '"' as '|22' and '|' as '|7C' */ - length = 1; /* 1 byte for termination */ - tmp1 = def->description; - - while (*tmp1 != '\0') { - if (*tmp1 == '"' || *tmp1 == '|') { - length += 2; - } - - ++tmp1; - ++length; - } + preliminaryDisplayName = esxUtil_EscapeHexPipe(def->name); - if (VIR_ALLOC_N(annotation, length) < 0) { - virReportOOMError(); - goto cleanup; - } + if (preliminaryDisplayName == NULL) { + goto cleanup; + } - tmp1 = def->description; /* reading from this one */ - tmp2 = annotation; /* writing to this one */ + displayName = esxUtil_EscapeHexPercent(preliminaryDisplayName); - while (*tmp1 != '\0') { - if (*tmp1 == '"') { - *tmp2++ = '|'; *tmp2++ = '2'; *tmp2++ = '2'; - } else if (*tmp1 == '|') { - *tmp2++ = '|'; *tmp2++ = '7'; *tmp2++ = 'C'; - } else { - *tmp2++ = *tmp1; - } + if (displayName == NULL) { + goto cleanup; + } - ++tmp1; - } + virBufferVSprintf(&buffer, "displayName = \"%s\"\n", displayName); - *tmp2 = '\0'; + /* def:description -> vmx:annotation */ + if (def->description != NULL) { + annotation = esxUtil_EscapeHexPipe(def->description); virBufferVSprintf(&buffer, "annotation = \"%s\"\n", annotation); } @@ -2780,6 +2750,8 @@ esxVMX_FormatConfig(esxVMX_Context *ctx, virCapsPtr caps, virDomainDefPtr def, virBufferFreeAndReset(&buffer); } + VIR_FREE(preliminaryDisplayName); + VIR_FREE(displayName); VIR_FREE(annotation); return vmx; diff --git a/tests/esxutilstest.c b/tests/esxutilstest.c index 4906ad4..c9a59c7 100644 --- a/tests/esxutilstest.c +++ b/tests/esxutilstest.c @@ -227,6 +227,56 @@ testConvertDateTimeToCalendarTime(const void *data ATTRIBUTE_UNUSED) +struct testDatastoreItem { + const char *string; + const char *escaped; +}; + +static struct testDatastoreItem datastoreItems[] = { + { "normal", "normal" }, + { "Aä1ö2ü3ß4#5~6!7§8/9%Z", + "A+w6Q-1+w7Y-2+w7w-3+w58-4+Iw-5+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-25Z" }, + { "Z~6!7§8/9%0#1\"2'3`4&A", + "Z+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-250+Iw-1_2'3+YA-4+Jg-A" }, + { "標準語", "+5qiZ5rqW6Kqe" }, + { "!\"#$%&'()*+,-./0123456789:;<=>?", + "+IQ-_+IyQl-25+Jg-'()_+Kw-,-.+JQ-2f0123456789_+Ow-_+PQ-__" }, + { "A Z[\\]^_B", "A Z+WyU-5c+XV4-_B" }, + { "A`B@{|}~DEL", "A+YA-B+QHs-_+fX4-DEL" }, + { "hÀÁÂÃÄÅH", "h+w4DDgcOCw4PDhMOF-H" }, + { "A쿀Z", "A+7L+A-Z" }, + { "!쿀A", "+Iey,gA-A" }, + { "~~~", "+fn5+" }, + { "~~~A", "+fn5+-A" }, + { "K%U/H\\Z", "K+JQ-25U+JQ-2fH+JQ-5cZ" }, +}; + +static int +testEscapeDatastoreItem(const void *data ATTRIBUTE_UNUSED) +{ + int i; + char *escaped = NULL; + + for (i = 0; i < ARRAY_CARDINALITY(datastoreItems); ++i) { + VIR_FREE(escaped); + + escaped = esxUtil_EscapeDatastoreItem(datastoreItems[i].string); + + if (escaped == NULL) { + return -1; + } + + if (STRNEQ(datastoreItems[i].escaped, escaped)) { + VIR_FREE(escaped); + return -1; + } + } + + return 0; +} + + + static int mymain(int argc, char **argv) { @@ -258,6 +308,7 @@ mymain(int argc, char **argv) DO_TEST(DiskNameToIndex); DO_TEST(ParseDatastorePath); DO_TEST(ConvertDateTimeToCalendarTime); + DO_TEST(EscapeDatastoreItem); return result == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/tests/xml2vmxdata/xml2vmx-annotation.vmx b/tests/xml2vmxdata/xml2vmx-annotation.vmx index 47d060d..5754c31 100644 --- a/tests/xml2vmxdata/xml2vmx-annotation.vmx +++ b/tests/xml2vmxdata/xml2vmx-annotation.vmx @@ -3,7 +3,7 @@ virtualHW.version = "4" guestOS = "other" uuid.bios = "56 4d 9b ef ac d9 b4 e0-c8 f0 ae a8 b9 10 35 15" displayName = "annotation" -annotation = "Some |7Ctext|7C to test the |22escaping|22: |7C|7C|22|22|7C|7C|22|7C Unescaped!" +annotation = "Some |7ctext|7c to test the |22escaping|22: |7c|7c|22|22|7c|7c|22|7c Unescaped!" memsize = "4" numvcpus = "1" floppy0.present = "false" -- 1.7.0.4

On Wed, Oct 13, 2010 at 11:06:44AM +0200, Matthias Bolte wrote:
VMware uses a mix of percent-, pipe- and base64-encoding in different combinations in different places.
Add a testcase for this. --- src/esx/README | 25 ++++ src/esx/esx_driver.c | 72 ++++++----- src/esx/esx_storage_driver.c | 42 ++++++- src/esx/esx_util.c | 198 ++++++++++++++++++++++++++++++ src/esx/esx_util.h | 18 +++ src/esx/esx_vi.c | 6 + src/esx/esx_vmx.c | 88 +++++--------- tests/esxutilstest.c | 51 ++++++++ tests/xml2vmxdata/xml2vmx-annotation.vmx | 2 +- 9 files changed, 405 insertions(+), 97 deletions(-)
That sounds vaguely familiar, I think I reviewed such a patch last month, right ? [...]
- virBufferURIEncodeString(&buffer, def->name); + escapedName = esxUtil_EscapeDatastoreItem(def->name); +
okay, we really need to make sure the escaping is XML compatible though
@@ -621,3 +622,200 @@ esxUtil_ReformatUuid(const char *input, char *output) [...] +char * +esxUtil_EscapeDatastoreItem(const char *string) +{ + char *replaced = strdup(string); + char *escaped1; + char *escaped2 = NULL; + + if (replaced == NULL) { + virReportOOMError(); + return NULL; + } + + esxUtil_ReplaceSpecialWindowsPathChars(replaced); + + escaped1 = esxUtil_EscapeHexPercent(replaced); + + if (escaped1 == NULL) { + goto cleanup; + } + + escaped2 = esxUtil_EscapeBase64(escaped1); +
Okay none of those may lead to a problem for XML ... [...]
--- a/tests/esxutilstest.c +++ b/tests/esxutilstest.c @@ -227,6 +227,56 @@ testConvertDateTimeToCalendarTime(const void *data ATTRIBUTE_UNUSED)
+struct testDatastoreItem { + const char *string; + const char *escaped; +}; + +static struct testDatastoreItem datastoreItems[] = { + { "normal", "normal" }, + { "Aä1ö2ü3ß4#5~6!7§8/9%Z", + "A+w6Q-1+w7Y-2+w7w-3+w58-4+Iw-5+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-25Z" }, + { "Z~6!7§8/9%0#1\"2'3`4&A", + "Z+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-250+Iw-1_2'3+YA-4+Jg-A" }, + { "標準語", "+5qiZ5rqW6Kqe" }, + { "!\"#$%&'()*+,-./0123456789:;<=>?", + "+IQ-_+IyQl-25+Jg-'()_+Kw-,-.+JQ-2f0123456789_+Ow-_+PQ-__" }, + { "A Z[\\]^_B", "A Z+WyU-5c+XV4-_B" }, + { "A`B@{|}~DEL", "A+YA-B+QHs-_+fX4-DEL" }, + { "hÀÁÂÃÄÅH", "h+w4DDgcOCw4PDhMOF-H" }, + { "A쿀Z", "A+7L+A-Z" }, + { "!쿀A", "+Iey,gA-A" }, + { "~~~", "+fn5+" }, + { "~~~A", "+fn5+-A" }, + { "K%U/H\\Z", "K+JQ-25U+JQ-2fH+JQ-5cZ" }, +};
Ouch :-) One question for the C purists. How do we know how characters outside of the ASCII range may be interpreted by a compiler ? The mail as received is made of one mime part, not multipart, it's defined as Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 so my mail reader probably assumes that this is utf8 characters, carried as a base64 encoded chunk of ASCII. Since my terminal is displaying in UTF-8 I apparently see your patch as it's expected, but as soon as it's saved on disk, most Unix/Linux tools will interpret those character using the locale of the user, i.e. if I were to compile this in a terminal using a non-UTF-8 locale, the compiler may very well interpret this slightly differently, okay this is data so maybe it's just bytes being copied without risk, but I'm still a bit worried there. I like the idea of testing this API seriously, but at the C level shouldn't we encode those in an ASCII way to be sure ? Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

On 10/13/2010 07:19 AM, Daniel Veillard wrote:
+ { "A쿀Z", "A+7L+A-Z" },
Ouch :-) One question for the C purists. How do we know how characters outside of the ASCII range may be interpreted by a compiler ?
Until C+1x is finalized, we cannot rely on the new Unicode string literals U"xxx". So, the only portable way in C89 or C99 to encode byte sequences is with octal or hex escapes, rather than via literal characters. Daniel is right that you cannot portably have 8-bit bytes in C source code, because the compilation is then locale-dependent, and will do different things depending not only on the compiler, but on the locale of the person doing the compilation. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

2010/10/13 Daniel Veillard <veillard@redhat.com>:
On Wed, Oct 13, 2010 at 11:06:44AM +0200, Matthias Bolte wrote:
VMware uses a mix of percent-, pipe- and base64-encoding in different combinations in different places.
Add a testcase for this. --- src/esx/README | 25 ++++ src/esx/esx_driver.c | 72 ++++++----- src/esx/esx_storage_driver.c | 42 ++++++- src/esx/esx_util.c | 198 ++++++++++++++++++++++++++++++ src/esx/esx_util.h | 18 +++ src/esx/esx_vi.c | 6 + src/esx/esx_vmx.c | 88 +++++--------- tests/esxutilstest.c | 51 ++++++++ tests/xml2vmxdata/xml2vmx-annotation.vmx | 2 +- 9 files changed, 405 insertions(+), 97 deletions(-)
That sounds vaguely familiar, I think I reviewed such a patch last month, right ?
I've sworn about this on IRC last week, so that's probably why it sounds familiar to you :)
[...]
- virBufferURIEncodeString(&buffer, def->name); + escapedName = esxUtil_EscapeDatastoreItem(def->name); +
okay, we really need to make sure the escaping is XML compatible though
ESX in general accepts UTF-8 and the escaping as it is doesn't add XML incompatible characters like <. But I'm not sure if the SOAP binding in the driver uses virBufferEscapeString in all critical places. During testing this I also used domain names that contained < and I didn't hit any obvious problems. I'll have to review the SOAP bindings for proper XML escaping. I think there are some places that don't escape properly, but didn't surface as problems yet.
@@ -621,3 +622,200 @@ esxUtil_ReformatUuid(const char *input, char *output) [...] +char * +esxUtil_EscapeDatastoreItem(const char *string) +{ + char *replaced = strdup(string); + char *escaped1; + char *escaped2 = NULL; + + if (replaced == NULL) { + virReportOOMError(); + return NULL; + } + + esxUtil_ReplaceSpecialWindowsPathChars(replaced); + + escaped1 = esxUtil_EscapeHexPercent(replaced); + + if (escaped1 == NULL) { + goto cleanup; + } + + escaped2 = esxUtil_EscapeBase64(escaped1); +
Okay none of those may lead to a problem for XML ...
As I said the newly added escaping doesn't create new XML related problems.
[...]
--- a/tests/esxutilstest.c +++ b/tests/esxutilstest.c @@ -227,6 +227,56 @@ testConvertDateTimeToCalendarTime(const void *data ATTRIBUTE_UNUSED)
+struct testDatastoreItem { + const char *string; + const char *escaped; +}; + +static struct testDatastoreItem datastoreItems[] = { + { "normal", "normal" }, + { "Aä1ö2ü3ß4#5~6!7§8/9%Z", + "A+w6Q-1+w7Y-2+w7w-3+w58-4+Iw-5+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-25Z" }, + { "Z~6!7§8/9%0#1\"2'3`4&A", + "Z+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-250+Iw-1_2'3+YA-4+Jg-A" }, + { "標準語", "+5qiZ5rqW6Kqe" }, + { "!\"#$%&'()*+,-./0123456789:;<=>?", + "+IQ-_+IyQl-25+Jg-'()_+Kw-,-.+JQ-2f0123456789_+Ow-_+PQ-__" }, + { "A Z[\\]^_B", "A Z+WyU-5c+XV4-_B" }, + { "A`B@{|}~DEL", "A+YA-B+QHs-_+fX4-DEL" }, + { "hÀÁÂÃÄÅH", "h+w4DDgcOCw4PDhMOF-H" }, + { "A쿀Z", "A+7L+A-Z" }, + { "!쿀A", "+Iey,gA-A" }, + { "~~~", "+fn5+" }, + { "~~~A", "+fn5+-A" }, + { "K%U/H\\Z", "K+JQ-25U+JQ-2fH+JQ-5cZ" }, +};
Ouch :-) One question for the C purists. How do we know how characters outside of the ASCII range may be interpreted by a compiler ? The mail as received is made of one mime part, not multipart, it's defined as
Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64
so my mail reader probably assumes that this is utf8 characters, carried as a base64 encoded chunk of ASCII. Since my terminal is displaying in UTF-8 I apparently see your patch as it's expected, but as soon as it's saved on disk, most Unix/Linux tools will interpret those character using the locale of the user, i.e. if I were to compile this in a terminal using a non-UTF-8 locale, the compiler may very well interpret this slightly differently, okay this is data so maybe it's just bytes being copied without risk, but I'm still a bit worried there.
I like the idea of testing this API seriously, but at the C level shouldn't we encode those in an ASCII way to be sure ?
Daniel
I didn't thought of this issue. As discussed on IRC, I replaced all non-ASCII chars with their UTF-8 encoding in octal, for example ä -> \303\244. So here is v2 attached that fixes the test case encoding issue. Matthias

On Wed, Oct 13, 2010 at 10:47:57PM +0200, Matthias Bolte wrote:
2010/10/13 Daniel Veillard <veillard@redhat.com>:
On Wed, Oct 13, 2010 at 11:06:44AM +0200, Matthias Bolte wrote:
VMware uses a mix of percent-, pipe- and base64-encoding in different combinations in different places.
Add a testcase for this. --- src/esx/README | 25 ++++ src/esx/esx_driver.c | 72 ++++++----- src/esx/esx_storage_driver.c | 42 ++++++- src/esx/esx_util.c | 198 ++++++++++++++++++++++++++++++ src/esx/esx_util.h | 18 +++ src/esx/esx_vi.c | 6 + src/esx/esx_vmx.c | 88 +++++--------- tests/esxutilstest.c | 51 ++++++++ tests/xml2vmxdata/xml2vmx-annotation.vmx | 2 +- 9 files changed, 405 insertions(+), 97 deletions(-)
That sounds vaguely familiar, I think I reviewed such a patch last month, right ?
I've sworn about this on IRC last week, so that's probably why it sounds familiar to you :)
hum, maybe I got confused then ...
+static struct testDatastoreItem datastoreItems[] = { + { "normal", "normal" }, + { /* "Aä1ö2ü3ß4#5~6!7§8/9%Z" */ + "A\303\2441\303\2662\303\2743\303\2374#5~6!7\302\2478/9%Z", + "A+w6Q-1+w7Y-2+w7w-3+w58-4+Iw-5+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-25Z" }, + { /* "Z~6!7§8/9%0#1\"2'3`4&A" */ "Z~6!7\302\2478/9%0#1\"2'3`4&A", + "Z+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-250+Iw-1_2'3+YA-4+Jg-A" }, + { /* "標準語" */ "\346\250\231\346\272\226\350\252\236", "+5qiZ5rqW6Kqe" }, + { "!\"#$%&'()*+,-./0123456789:;<=>?", + "+IQ-_+IyQl-25+Jg-'()_+Kw-,-.+JQ-2f0123456789_+Ow-_+PQ-__" }, + { "A Z[\\]^_B", "A Z+WyU-5c+XV4-_B" }, + { "A`B@{|}~DEL", "A+YA-B+QHs-_+fX4-DEL" }, + { /* "hÀÁÂÃÄÅH" */ "h\303\200\303\201\303\202\303\203\303\204\303\205H", + "h+w4DDgcOCw4PDhMOF-H" }, + { /* "A쿀Z" */ "A\354\277\200Z", "A+7L+A-Z" }, + { /* "!쿀A" */ "!\354\277\200A", "+Iey,gA-A" }, + { "~~~", "+fn5+" }, + { "~~~A", "+fn5+-A" }, + { "K%U/H\\Z", "K+JQ-25U+JQ-2fH+JQ-5cZ" }, + { "vvv<A\"B\"C>zzz", "vvv_A_B_C_zzz" }, +};
I assume in comments it's harmless, so ACK :-) thanks ! Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

2010/10/14 Daniel Veillard <veillard@redhat.com>:
On Wed, Oct 13, 2010 at 10:47:57PM +0200, Matthias Bolte wrote:
2010/10/13 Daniel Veillard <veillard@redhat.com>:
On Wed, Oct 13, 2010 at 11:06:44AM +0200, Matthias Bolte wrote:
VMware uses a mix of percent-, pipe- and base64-encoding in different combinations in different places.
Add a testcase for this. --- src/esx/README | 25 ++++ src/esx/esx_driver.c | 72 ++++++----- src/esx/esx_storage_driver.c | 42 ++++++- src/esx/esx_util.c | 198 ++++++++++++++++++++++++++++++ src/esx/esx_util.h | 18 +++ src/esx/esx_vi.c | 6 + src/esx/esx_vmx.c | 88 +++++--------- tests/esxutilstest.c | 51 ++++++++ tests/xml2vmxdata/xml2vmx-annotation.vmx | 2 +- 9 files changed, 405 insertions(+), 97 deletions(-)
That sounds vaguely familiar, I think I reviewed such a patch last month, right ?
I've sworn about this on IRC last week, so that's probably why it sounds familiar to you :)
hum, maybe I got confused then ...
+static struct testDatastoreItem datastoreItems[] = { + { "normal", "normal" }, + { /* "Aä1ö2ü3ß4#5~6!7§8/9%Z" */ + "A\303\2441\303\2662\303\2743\303\2374#5~6!7\302\2478/9%Z", + "A+w6Q-1+w7Y-2+w7w-3+w58-4+Iw-5+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-25Z" }, + { /* "Z~6!7§8/9%0#1\"2'3`4&A" */ "Z~6!7\302\2478/9%0#1\"2'3`4&A", + "Z+fg-6+IQ-7+wqc-8+JQ-2f9+JQ-250+Iw-1_2'3+YA-4+Jg-A" }, + { /* "標準語" */ "\346\250\231\346\272\226\350\252\236", "+5qiZ5rqW6Kqe" }, + { "!\"#$%&'()*+,-./0123456789:;<=>?", + "+IQ-_+IyQl-25+Jg-'()_+Kw-,-.+JQ-2f0123456789_+Ow-_+PQ-__" }, + { "A Z[\\]^_B", "A Z+WyU-5c+XV4-_B" }, + { "A`B@{|}~DEL", "A+YA-B+QHs-_+fX4-DEL" }, + { /* "hÀÁÂÃÄÅH" */ "h\303\200\303\201\303\202\303\203\303\204\303\205H", + "h+w4DDgcOCw4PDhMOF-H" }, + { /* "A쿀Z" */ "A\354\277\200Z", "A+7L+A-Z" }, + { /* "!쿀A" */ "!\354\277\200A", "+Iey,gA-A" }, + { "~~~", "+fn5+" }, + { "~~~A", "+fn5+-A" }, + { "K%U/H\\Z", "K+JQ-25U+JQ-2fH+JQ-5cZ" }, + { "vvv<A\"B\"C>zzz", "vvv_A_B_C_zzz" }, +};
I assume in comments it's harmless, so ACK :-)
thanks !
Daniel
Well, the encoding problem will still affect the comments but that should be no problem for the compiler or the result of the test case. This is how gnulib does this in its test cases. Eric linked me an example for the gnulib codebase and I just followed that example. I pushed this now. Matthias
participants (3)
-
Daniel Veillard
-
Eric Blake
-
Matthias Bolte