[libvirt] [PATCHv2 0/5] Strip control codes from virBufferEscapeString

For https://bugzilla.redhat.com/show_bug.cgi?id=1184131 https://bugzilla.redhat.com/show_bug.cgi?id=1066564 v1: https://www.redhat.com/archives/libvir-list/2015-March/msg01529.html v2: * added patches 1-4 that should prevent us from calling virBufferEscapeString with control codes * patch 5 is unchanged Ján Tomko (5): tests: rename testStripIPv6BracketsData to testStripData Add functions dealing with control characters in strings Strip control characters from sysfs attributes Ignore storage volumes with control codes in their names Strip control codes in virBufferEscapeString src/libvirt_private.syms | 2 ++ src/node_device/node_device_udev.c | 2 ++ src/storage/storage_backend_fs.c | 6 +++++ src/util/virbuffer.c | 14 ++++++++--- src/util/virstring.c | 39 ++++++++++++++++++++++++++++++ src/util/virstring.h | 2 ++ tests/virbuftest.c | 49 ++++++++++++++++++++++++++++++++++++++ tests/virstringtest.c | 45 +++++++++++++++++++++++++++++++--- 8 files changed, 153 insertions(+), 6 deletions(-) -- 2.0.5

For reuse with other Strip* functions. --- tests/virstringtest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/virstringtest.c b/tests/virstringtest.c index a0bfd61..9d0b438 100644 --- a/tests/virstringtest.c +++ b/tests/virstringtest.c @@ -522,14 +522,14 @@ testVirStringFreeListCount(const void *opaque ATTRIBUTE_UNUSED) } -struct testStripIPv6BracketsData { +struct testStripData { const char *string; const char *result; }; static int testStripIPv6Brackets(const void *args) { - const struct testStripIPv6BracketsData *data = args; + const struct testStripData *data = args; int ret = -1; char *res = NULL; @@ -766,7 +766,7 @@ mymain(void) #define TEST_STRIP_IPV6_BRACKETS(str, res) \ do { \ - struct testStripIPv6BracketsData stripData = { \ + struct testStripData stripData = { \ .string = str, \ .result = res, \ }; \ -- 2.0.5

On Tue, Apr 14, 2015 at 13:28:46 +0200, Ján Tomko wrote:
For reuse with other Strip* functions. --- tests/virstringtest.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
ACK, Peter

Add virStringHasControlChars that checks if the string has any control characters other than \t\r\n, and virStringStripControlChars that removes them in-place. --- src/libvirt_private.syms | 2 ++ src/util/virstring.c | 39 +++++++++++++++++++++++++++++++++++++++ src/util/virstring.h | 2 ++ tests/virstringtest.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 7166283..d37a6b5 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2134,6 +2134,7 @@ virStrdup; virStringArrayHasString; virStringFreeList; virStringFreeListCount; +virStringHasControlChars; virStringIsEmpty; virStringJoin; virStringListLength; @@ -2143,6 +2144,7 @@ virStringSortCompare; virStringSortRevCompare; virStringSplit; virStringSplitCount; +virStringStripControlChars; virStringStripIPv6Brackets; virStrncpy; virStrndup; diff --git a/src/util/virstring.c b/src/util/virstring.c index 3dad9dd..9eb48b5 100644 --- a/src/util/virstring.c +++ b/src/util/virstring.c @@ -968,3 +968,42 @@ virStringStripIPv6Brackets(char *str) str[len - 2] = '\0'; } } + +static const char control_chars[] = + "\x01\x02\x03\x04\x05\x06\x07" + "\x08" /* \t \n */ "\x0B\x0C" /* \r */ "\x0E\x0F" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; + +bool +virStringHasControlChars(const char *str) +{ + if (!str) + return false; + + return strcspn(str, control_chars) != strlen(str); +} +/** + * virStringStripControlChars: + * @str: the string to strip + * + * Modify the string in-place to remove the control characters + * in the interval: (0x01, 0x20) + */ +void +virStringStripControlChars(char *str) +{ + size_t len, i, j; + + if (!virStringHasControlChars(str)) + return; + + len = strlen(str); + for (i = 0, j = 0; i < len; i++) { + if (index(control_chars, str[i])) + continue; + + str[j++] = str[i]; + } + str[j] = '\0'; +} diff --git a/src/util/virstring.h b/src/util/virstring.h index 2ec60fa..e6dcb32 100644 --- a/src/util/virstring.h +++ b/src/util/virstring.h @@ -271,5 +271,7 @@ char *virStringReplace(const char *haystack, ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(3); void virStringStripIPv6Brackets(char *str); +bool virStringHasControlChars(const char *str); +void virStringStripControlChars(char *str); #endif /* __VIR_STRING_H__ */ diff --git a/tests/virstringtest.c b/tests/virstringtest.c index 9d0b438..38d0126 100644 --- a/tests/virstringtest.c +++ b/tests/virstringtest.c @@ -551,6 +551,29 @@ static int testStripIPv6Brackets(const void *args) return ret; } +static int testStripControlChars(const void *args) +{ + const struct testStripData *data = args; + int ret = -1; + char *res = NULL; + + if (VIR_STRDUP(res, data->string) < 0) + goto cleanup; + + virStringStripControlChars(res); + + if (STRNEQ_NULLABLE(res, data->result)) { + fprintf(stderr, "Returned '%s', expected '%s'\n", + NULLSTR(res), NULLSTR(data->result)); + goto cleanup; + } + + ret = 0; + + cleanup: + VIR_FREE(res); + return ret; +} static int mymain(void) @@ -783,6 +806,22 @@ mymain(void) TEST_STRIP_IPV6_BRACKETS(":hello]", ":hello]"); TEST_STRIP_IPV6_BRACKETS(":[]:", ":[]:"); +#define TEST_STRIP_CONTROL_CHARS(str, res) \ + do { \ + struct testStripData stripData = { \ + .string = str, \ + .result = res, \ + }; \ + if (virtTestRun("Strip control chars from " #str, \ + testStripControlChars, &stripData) < 0) \ + ret = -1; \ + } while (0) + + TEST_STRIP_CONTROL_CHARS(NULL, NULL); + TEST_STRIP_CONTROL_CHARS("\nhello \r hello\t", "\nhello \r hello\t"); + TEST_STRIP_CONTROL_CHARS("\x01H\x02" "E\x03L\x04L\x05O", "HELLO"); + TEST_STRIP_CONTROL_CHARS("\x01\x02\x03\x04HELL\x05O", "HELLO"); + TEST_STRIP_CONTROL_CHARS("\nhello \x01\x07hello\t", "\nhello hello\t"); return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 2.0.5

On Tue, Apr 14, 2015 at 13:28:47 +0200, Ján Tomko wrote:
Add virStringHasControlChars that checks if the string has any control characters other than \t\r\n, and virStringStripControlChars that removes them in-place. --- src/libvirt_private.syms | 2 ++ src/util/virstring.c | 39 +++++++++++++++++++++++++++++++++++++++ src/util/virstring.h | 2 ++ tests/virstringtest.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 7166283..d37a6b5 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2134,6 +2134,7 @@ virStrdup; virStringArrayHasString; virStringFreeList; virStringFreeListCount; +virStringHasControlChars; virStringIsEmpty; virStringJoin; virStringListLength; @@ -2143,6 +2144,7 @@ virStringSortCompare; virStringSortRevCompare; virStringSplit; virStringSplitCount; +virStringStripControlChars; virStringStripIPv6Brackets; virStrncpy; virStrndup; diff --git a/src/util/virstring.c b/src/util/virstring.c index 3dad9dd..9eb48b5 100644 --- a/src/util/virstring.c +++ b/src/util/virstring.c @@ -968,3 +968,42 @@ virStringStripIPv6Brackets(char *str) str[len - 2] = '\0'; } } + +static const char control_chars[] = + "\x01\x02\x03\x04\x05\x06\x07" + "\x08" /* \t \n */ "\x0B\x0C" /* \r */ "\x0E\x0F"
The alignment is off
+ "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; + +bool +virStringHasControlChars(const char *str) +{ + if (!str) + return false; + + return strcspn(str, control_chars) != strlen(str); +}
Missing newlines between function and comment.
+/** + * virStringStripControlChars: + * @str: the string to strip + * + * Modify the string in-place to remove the control characters + * in the interval: (0x01, 0x20)
The interval is more like <0x01, 0x20).
+ */ +void +virStringStripControlChars(char *str) +{ + size_t len, i, j; + + if (!virStringHasControlChars(str)) + return;
The check above calls strlen and iterates through the string looking for the characters.
+ + len = strlen(str); + for (i = 0, j = 0; i < len; i++) { + if (index(control_chars, str[i])) + continue;
Here you iterate through the string and check for the characters ... I think it should be safe to just execute this function right away.
+ + str[j++] = str[i]; + } + str[j] = '\0'; +}
The rest looks good. ACK with the nits above addressed. Peter

On Wed, Apr 15, 2015 at 12:00:51PM +0200, Peter Krempa wrote:
On Tue, Apr 14, 2015 at 13:28:47 +0200, Ján Tomko wrote:
Add virStringHasControlChars that checks if the string has any control characters other than \t\r\n, and virStringStripControlChars that removes them in-place. --- src/libvirt_private.syms | 2 ++ src/util/virstring.c | 39 +++++++++++++++++++++++++++++++++++++++ src/util/virstring.h | 2 ++ tests/virstringtest.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+)
+ +bool +virStringHasControlChars(const char *str) +{ + if (!str) + return false; + + return strcspn(str, control_chars) != strlen(str); +}
+ */ +void +virStringStripControlChars(char *str) +{ + size_t len, i, j; + + if (!virStringHasControlChars(str)) + return;
The check above calls strlen and iterates through the string looking for the characters.
Right, the check can do just: return str[strcspn(str, control_chars)] != '\0'; Jan

Including them in the XML makes them unparsable. https://bugzilla.redhat.com/show_bug.cgi?id=1184131 --- src/node_device/node_device_udev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 8c39e5f..5e98222 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -239,6 +239,8 @@ static int udevGetStringSysfsAttr(struct udev_device *udev_device, ret = udevGetDeviceSysfsAttr(udev_device, attr_name, &tmp); + virStringStripControlChars(tmp); + if (tmp != NULL && (STREQ(tmp, ""))) { VIR_FREE(tmp); tmp = NULL; -- 2.0.5

On Tue, Apr 14, 2015 at 13:28:48 +0200, Ján Tomko wrote:
Including them in the XML makes them unparsable.
https://bugzilla.redhat.com/show_bug.cgi?id=1184131 --- src/node_device/node_device_udev.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 8c39e5f..5e98222 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -239,6 +239,8 @@ static int udevGetStringSysfsAttr(struct udev_device *udev_device,
ret = udevGetDeviceSysfsAttr(udev_device, attr_name, &tmp);
+ virStringStripControlChars(tmp); +
Looks safe enoguh to me, but I'd state in a comment that this function strips possible control chars.
if (tmp != NULL && (STREQ(tmp, ""))) { VIR_FREE(tmp); tmp = NULL;
ACK with comment added. Peter

To prevent generating invalid XML. https://bugzilla.redhat.com/show_bug.cgi?id=1066564 --- src/storage/storage_backend_fs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/storage/storage_backend_fs.c b/src/storage/storage_backend_fs.c index d4d65bc..fb33f08 100644 --- a/src/storage/storage_backend_fs.c +++ b/src/storage/storage_backend_fs.c @@ -860,6 +860,12 @@ virStorageBackendFileSystemRefresh(virConnectPtr conn ATTRIBUTE_UNUSED, while ((direrr = virDirRead(dir, &ent, pool->def->target.path)) > 0) { int ret; + if (virStringHasControlChars(ent->d_name)) { + VIR_WARN("Ignoring file with control characters in its name: '%s'", + ent->d_name); + continue; + } + if (VIR_ALLOC(vol) < 0) goto error; -- 2.0.5

On Tue, Apr 14, 2015 at 13:28:49 +0200, Ján Tomko wrote:
To prevent generating invalid XML.
https://bugzilla.redhat.com/show_bug.cgi?id=1066564 --- src/storage/storage_backend_fs.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/src/storage/storage_backend_fs.c b/src/storage/storage_backend_fs.c index d4d65bc..fb33f08 100644 --- a/src/storage/storage_backend_fs.c +++ b/src/storage/storage_backend_fs.c @@ -860,6 +860,12 @@ virStorageBackendFileSystemRefresh(virConnectPtr conn ATTRIBUTE_UNUSED, while ((direrr = virDirRead(dir, &ent, pool->def->target.path)) > 0) { int ret;
+ if (virStringHasControlChars(ent->d_name)) { + VIR_WARN("Ignoring file with control characters in its name: '%s'", + ent->d_name);
I'm not quite sure that printing the string containing control characters to the log file is a good idea. While journalctl probably will handle the string properly, classic text logs may cause problems then. I'd rather log the pool name or directory path or something else than the offending name itself.
+ continue; + } + if (VIR_ALLOC(vol) < 0) goto error;
Peter

These cannot be represented in XML. We have been stripping them, but only if the string had characters that needed escaping: <>"'& Extend the strcspn check to include control codes, and strip them even if we don't do any escaping. https://bugzilla.redhat.com/show_bug.cgi?id=1184131 https://bugzilla.redhat.com/show_bug.cgi?id=1066564 --- src/util/virbuffer.c | 14 +++++++++++--- tests/virbuftest.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/util/virbuffer.c b/src/util/virbuffer.c index 706dbfa..3d13c90 100644 --- a/src/util/virbuffer.c +++ b/src/util/virbuffer.c @@ -438,6 +438,13 @@ virBufferEscapeString(virBufferPtr buf, const char *format, const char *str) int len; char *escaped, *out; const char *cur; + const char forbidden_characters[] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + /*\t*/ /*\n*/ 0x0B, 0x0C, /*\r*/ 0x0E, 0x0F, 0x10, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x19, '"', '&', '\'', '<', '>', + '\0' + }; if ((format == NULL) || (buf == NULL) || (str == NULL)) return; @@ -446,7 +453,7 @@ virBufferEscapeString(virBufferPtr buf, const char *format, const char *str) return; len = strlen(str); - if (strcspn(str, "<>&'\"") == len) { + if (strcspn(str, forbidden_characters) == len) { virBufferAsprintf(buf, format, str); return; } @@ -490,8 +497,7 @@ virBufferEscapeString(virBufferPtr buf, const char *format, const char *str) *out++ = 'o'; *out++ = 's'; *out++ = ';'; - } else if (((unsigned char)*cur >= 0x20) || (*cur == '\n') || (*cur == '\t') || - (*cur == '\r')) { + } else if (!strchr(forbidden_characters, *cur)) { /* * default case, just copy ! * Note that character over 0x80 are likely to give problem @@ -499,6 +505,8 @@ virBufferEscapeString(virBufferPtr buf, const char *format, const char *str) * it's hard to handle properly we have to assume it's UTF-8 too */ *out++ = *cur; + } else { + /* silently ignore control characters */ } cur++; } diff --git a/tests/virbuftest.c b/tests/virbuftest.c index 21cb18b..10398d5 100644 --- a/tests/virbuftest.c +++ b/tests/virbuftest.c @@ -349,6 +349,39 @@ testBufAddStr(const void *opaque ATTRIBUTE_UNUSED) static int +testBufEscapeStr(const void *opaque ATTRIBUTE_UNUSED) +{ + const struct testBufAddStrData *data = opaque; + virBuffer buf = VIR_BUFFER_INITIALIZER; + char *actual; + int ret = -1; + + virBufferAddLit(&buf, "<c>\n"); + virBufferAdjustIndent(&buf, 2); + virBufferEscapeString(&buf, "<el>%s</el>\n", data->data); + virBufferAdjustIndent(&buf, -2); + virBufferAddLit(&buf, "</c>"); + + if (!(actual = virBufferContentAndReset(&buf))) { + TEST_ERROR("buf is empty"); + goto cleanup; + } + + if (STRNEQ_NULLABLE(actual, data->expect)) { + TEST_ERROR("testBufEscapeStr(): Strings don't match:\n"); + virtTestDifference(stderr, data->expect, actual); + goto cleanup; + } + + ret = 0; + + cleanup: + VIR_FREE(actual); + return ret; +} + + +static int mymain(void) { int ret = 0; @@ -379,6 +412,22 @@ mymain(void) DO_TEST_ADD_STR("<a/>\n", "<c>\n <a/>\n</c>"); DO_TEST_ADD_STR("<b>\n <a/>\n</b>\n", "<c>\n <b>\n <a/>\n </b>\n</c>"); +#define DO_TEST_ESCAPE(data, expect) \ + do { \ + struct testBufAddStrData info = { data, expect }; \ + if (virtTestRun("Buf: EscapeStr", testBufEscapeStr, &info) < 0) \ + ret = -1; \ + } while (0) + + DO_TEST_ESCAPE("<td></td><td></td>", + "<c>\n <el><td></td><td></td></el>\n</c>"); + DO_TEST_ESCAPE("\007\"&&\"\x15", + "<c>\n <el>"&&"</el>\n</c>"); + DO_TEST_ESCAPE(",,'..',,", + "<c>\n <el>,,'..',,</el>\n</c>"); + DO_TEST_ESCAPE("\x01\x01\x02\x03\x05\x08", + "<c>\n <el></el>\n</c>"); + return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 2.0.5

On Tue, Apr 14, 2015 at 13:28:50 +0200, Ján Tomko wrote:
These cannot be represented in XML.
We have been stripping them, but only if the string had characters that needed escaping: <>"'&
Extend the strcspn check to include control codes, and strip them even if we don't do any escaping.
https://bugzilla.redhat.com/show_bug.cgi?id=1184131 https://bugzilla.redhat.com/show_bug.cgi?id=1066564 --- src/util/virbuffer.c | 14 +++++++++++--- tests/virbuftest.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-)
ACK, Peter
participants (2)
-
Ján Tomko
-
Peter Krempa