[libvirt] [PATCH] esx: Handle non-UTF-8 encoded VMX files

ESX(i) uses UTF-8, but a Windows based GSX server writes Windows-1252 encoded VMX files. Add a test case to ensure that libxml2 provides Windows-1252 to UTF-8 conversion. --- This more general patch is a replacement for this patch: https://www.redhat.com/archives/libvir-list/2010-October/msg00516.html src/esx/esx_util.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- src/esx/esx_util.h | 2 ++ src/esx/esx_vmx.c | 30 ++++++++++++++++++++++++++++++ tests/esxutilstest.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 3 deletions(-) diff --git a/src/esx/esx_util.c b/src/esx/esx_util.c index 5fe72fc..24b931f 100644 --- a/src/esx/esx_util.c +++ b/src/esx/esx_util.c @@ -612,9 +612,9 @@ esxUtil_ReformatUuid(const char *input, char *output) unsigned char uuid[VIR_UUID_BUFLEN]; if (virUUIDParse(input, uuid) < 0) { - ESX_VI_ERROR(VIR_ERR_INTERNAL_ERROR, - _("Could not parse UUID from string '%s'"), - input); + ESX_ERROR(VIR_ERR_INTERNAL_ERROR, + _("Could not parse UUID from string '%s'"), + input); return -1; } @@ -819,3 +819,45 @@ esxUtil_EscapeDatastoreItem(const char *string) return escaped2; } + + + +char * +esxUtil_ConvertToUTF8(const char *encoding, const char *string) +{ + char *result = NULL; + xmlCharEncodingHandlerPtr handler; + xmlBufferPtr input; + xmlBufferPtr utf8; + + handler = xmlFindCharEncodingHandler(encoding); + + if (handler == NULL) { + ESX_ERROR(VIR_ERR_INTERNAL_ERROR, + _("libxml2 doesn't handle %s encoding"), encoding); + return NULL; + } + + input = xmlBufferCreateStatic((char *)string, strlen(string)); + utf8 = xmlBufferCreate(); + + if (xmlCharEncInFunc(handler, utf8, input) < 0) { + ESX_ERROR(VIR_ERR_INTERNAL_ERROR, + _("Could not convert from %s to UTF-8 encoding"), encoding); + goto cleanup; + } + + result = strdup((const char *)xmlBufferContent(utf8)); + + if (result == NULL) { + virReportOOMError(); + goto cleanup; + } + + cleanup: + xmlCharEncCloseFunc(handler); + xmlBufferFree(input); + xmlBufferFree(utf8); + + return result; +} diff --git a/src/esx/esx_util.h b/src/esx/esx_util.h index 669a4f2..694e935 100644 --- a/src/esx/esx_util.h +++ b/src/esx/esx_util.h @@ -89,4 +89,6 @@ void esxUtil_ReplaceSpecialWindowsPathChars(char *string); char *esxUtil_EscapeDatastoreItem(const char *string); +char *esxUtil_ConvertToUTF8(const char *encoding, const char *string); + #endif /* __ESX_UTIL_H__ */ diff --git a/src/esx/esx_vmx.c b/src/esx/esx_vmx.c index 7dc8e60..7ec8c0e 100644 --- a/src/esx/esx_vmx.c +++ b/src/esx/esx_vmx.c @@ -868,6 +868,8 @@ esxVMX_ParseConfig(esxVMX_Context *ctx, virCapsPtr caps, const char *vmx, { bool success = false; virConfPtr conf = NULL; + char *encoding = NULL; + char *utf8; virDomainDefPtr def = NULL; long long config_version = 0; long long virtualHW_version = 0; @@ -895,6 +897,33 @@ esxVMX_ParseConfig(esxVMX_Context *ctx, virCapsPtr caps, const char *vmx, return NULL; } + /* vmx:.encoding */ + if (esxUtil_GetConfigString(conf, ".encoding", &encoding, true) < 0) { + goto cleanup; + } + + if (encoding == NULL || STRCASEEQ(encoding, "UTF-8")) { + /* nothing */ + } else { + virConfFree(conf); + conf = NULL; + + utf8 = esxUtil_ConvertToUTF8(encoding, vmx); + + if (utf8 == NULL) { + goto cleanup; + } + + conf = virConfReadMem(utf8, strlen(utf8), VIR_CONF_FLAG_VMX_FORMAT); + + VIR_FREE(utf8); + + if (conf == NULL) { + goto cleanup; + } + } + + /* Allocate domain def */ if (VIR_ALLOC(def) < 0) { virReportOOMError(); return NULL; @@ -1359,6 +1388,7 @@ esxVMX_ParseConfig(esxVMX_Context *ctx, virCapsPtr caps, const char *vmx, } virConfFree(conf); + VIR_FREE(encoding); VIR_FREE(sched_cpu_affinity); VIR_FREE(guestOS); diff --git a/tests/esxutilstest.c b/tests/esxutilstest.c index d4042c2..97e154e 100644 --- a/tests/esxutilstest.c +++ b/tests/esxutilstest.c @@ -280,6 +280,47 @@ testEscapeDatastoreItem(const void *data ATTRIBUTE_UNUSED) +struct testWindows1252ToUTF8 { + const char *windows1252; + const char *utf8; +}; + +static struct testWindows1252ToUTF8 windows1252ToUTF8[] = { + { "normal", "normal" }, + { /* "A€Z" */ "A\200Z", "A\342\202\254Z" }, + { /* "Aä1ö2ü3ß4#5~6!7§8/9%Z" */ "A\3441\3662\3743\3374#5~6!7\2478/9%Z", + "A\303\2441\303\2662\303\2743\303\2374#5~6!7\302\2478/9%Z" }, + { /* "hÀÁÂÃÄÅH" */ "h\300\301\302\303\304\305H", + "h\303\200\303\201\303\202\303\203\303\204\303\205H" }, +}; + +static int +testConvertWindows1252ToUTF8(const void *data ATTRIBUTE_UNUSED) +{ + int i; + char *utf8 = NULL; + + for (i = 0; i < ARRAY_CARDINALITY(windows1252ToUTF8); ++i) { + VIR_FREE(utf8); + + utf8 = esxUtil_ConvertToUTF8("Windows-1252", + windows1252ToUTF8[i].windows1252); + + if (utf8 == NULL) { + return -1; + } + + if (STRNEQ(windows1252ToUTF8[i].utf8, utf8)) { + VIR_FREE(utf8); + return -1; + } + } + + return 0; +} + + + static int mymain(int argc, char **argv) { @@ -312,6 +353,7 @@ mymain(int argc, char **argv) DO_TEST(ParseDatastorePath); DO_TEST(ConvertDateTimeToCalendarTime); DO_TEST(EscapeDatastoreItem); + DO_TEST(ConvertWindows1252ToUTF8); return result == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 1.7.0.4

On 10/16/2010 12:37 PM, Matthias Bolte wrote:
ESX(i) uses UTF-8, but a Windows based GSX server writes Windows-1252 encoded VMX files.
Add a test case to ensure that libxml2 provides Windows-1252 to UTF-8 conversion. ---
+ + if (result == NULL) { + virReportOOMError(); + goto cleanup; + } + + cleanup:
Is that last goto necessary, since the label is reached next anyways? ACK with that nit addressed. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On 10/18/2010 11:22 AM, Eric Blake wrote:
On 10/16/2010 12:37 PM, Matthias Bolte wrote:
ESX(i) uses UTF-8, but a Windows based GSX server writes Windows-1252 encoded VMX files.
Add a test case to ensure that libxml2 provides Windows-1252 to UTF-8 conversion. ---
+ + if (result == NULL) { + virReportOOMError(); + goto cleanup; + } + + cleanup:
Is that last goto necessary, since the label is reached next anyways?
Maybe this is a defensive goto, assuming that someday someone may add extra code after that if clause. And for the moment it's just a single jmp that gets optimized out. (kind of like putting a break on the last case of a switch statement. It doesn't do anything now, but may save your bacon later if Ralph Wiggum goes playing in your code ;-))

On 10/18/2010 10:22 AM, Laine Stump wrote:
+ + if (result == NULL) { + virReportOOMError(); + goto cleanup; + } + + cleanup:
Is that last goto necessary, since the label is reached next anyways?
Maybe this is a defensive goto, assuming that someday someone may add extra code after that if clause. And for the moment it's just a single jmp that gets optimized out. (kind of like putting a break on the last case of a switch statement. It doesn't do anything now, but may save your bacon later if Ralph Wiggum goes playing in your code ;-))
Fair enough :) Although one would hope that our review process exists to shield us somewhat from whatever Ralph Wiggum's coding style throws at us :) -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On Mon, Oct 18, 2010 at 10:37:31AM -0600, Eric Blake wrote:
On 10/18/2010 10:22 AM, Laine Stump wrote:
+ + if (result == NULL) { + virReportOOMError(); + goto cleanup; + } + + cleanup:
Is that last goto necessary, since the label is reached next anyways?
Maybe this is a defensive goto, assuming that someday someone may add extra code after that if clause. And for the moment it's just a single jmp that gets optimized out. (kind of like putting a break on the last case of a switch statement. It doesn't do anything now, but may save your bacon later if Ralph Wiggum goes playing in your code ;-))
Fair enough :) Although one would hope that our review process exists to shield us somewhat from whatever Ralph Wiggum's coding style throws at us :)
Well that's not my style either, but ACK, please push :-) Now I should fix libxml2 to support CP1252 event when iconv is not used (may happen on Windows), but that's a small separate issue :-) Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@veillard.com | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/

2010/10/19 Daniel Veillard <veillard@redhat.com>:
On Mon, Oct 18, 2010 at 10:37:31AM -0600, Eric Blake wrote:
On 10/18/2010 10:22 AM, Laine Stump wrote:
+ + if (result == NULL) { + virReportOOMError(); + goto cleanup; + } + + cleanup:
Is that last goto necessary, since the label is reached next anyways?
Maybe this is a defensive goto, assuming that someday someone may add extra code after that if clause. And for the moment it's just a single jmp that gets optimized out. (kind of like putting a break on the last case of a switch statement. It doesn't do anything now, but may save your bacon later if Ralph Wiggum goes playing in your code ;-))
Fair enough :) Although one would hope that our review process exists to shield us somewhat from whatever Ralph Wiggum's coding style throws at us :)
Well that's not my style either, but ACK, please push :-) Now I should fix libxml2 to support CP1252 event when iconv is not used (may happen on Windows), but that's a small separate issue :-)
Daniel
I just wanted to be defensive here and use the common style and emphasis that this is an error condition. But this if condition and goto gets removed now, because Daniel suggested on IRC to steal the content of the buffer instead of strdup'ing it: diff --git a/src/esx/esx_util.c b/src/esx/esx_util.c index 24b931f..9ab0f70 100644 --- a/src/esx/esx_util.c +++ b/src/esx/esx_util.c @@ -847,12 +847,8 @@ esxUtil_ConvertToUTF8(const char *encoding, const char *string) goto cleanup; } - result = strdup((const char *)xmlBufferContent(utf8)); - - if (result == NULL) { - virReportOOMError(); - goto cleanup; - } + result = (char *)utf8->content; + utf8->content = NULL; cleanup: xmlCharEncCloseFunc(handler); Thanks, pushed with this diff folded in. Matthias
participants (4)
-
Daniel Veillard
-
Eric Blake
-
Laine Stump
-
Matthias Bolte