[libvirt] [PATCH 0/2] Add two string helper methods

As mentioned in my review, I took the first two patches from this series and polished them up ready for merge https://www.redhat.com/archives/libvir-list/2014-January/msg01067.html Hopefully this should be good enough for the vbox snapshot work to use. Changes since that posting - Added unit tests - Changed virStringSearch API to return all matches in one go, rather than need repeated calls - Rewrote virStringReplace to use virBuffer APIs Daniel P. Berrange (1): Add virStringReplace method for substring replacement Manuel VIVES (1): Add virStringSearch method for regex matching po/POTFILES.in | 1 + src/libvirt_private.syms | 2 + src/util/virstring.c | 146 ++++++++++++++++++++++++++++++++++++++++++- src/util/virstring.h | 12 ++++ tests/virstringtest.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 319 insertions(+), 1 deletion(-) -- 1.8.5.3

From: Manuel VIVES <manuel.vives@diateam.net> Add a virStringSearch method to virstring.{c,h} which performs a regex match against a string and returns the matching substrings. Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- po/POTFILES.in | 1 + src/libvirt_private.syms | 1 + src/util/virstring.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/virstring.h | 7 ++++ tests/virstringtest.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 211 insertions(+) diff --git a/po/POTFILES.in b/po/POTFILES.in index 5d02062..c565771 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -192,6 +192,7 @@ src/util/virscsi.c src/util/virsocketaddr.c src/util/virstatslinux.c src/util/virstoragefile.c +src/util/virstring.c src/util/virsysinfo.c src/util/virerror.c src/util/virerror.h diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index ec786e4..f26190d 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1794,6 +1794,7 @@ virStringArrayHasString; virStringFreeList; virStringJoin; virStringListLength; +virStringSearch; virStringSortCompare; virStringSortRevCompare; virStringSplit; diff --git a/src/util/virstring.c b/src/util/virstring.c index 8d0ca70..67a87d3 100644 --- a/src/util/virstring.c +++ b/src/util/virstring.c @@ -23,12 +23,14 @@ #include <stdlib.h> #include <stdio.h> +#include <regex.h> #include "c-ctype.h" #include "virstring.h" #include "viralloc.h" #include "virbuffer.h" #include "virerror.h" +#include "virlog.h" #define VIR_FROM_THIS VIR_FROM_NONE @@ -645,3 +647,103 @@ int virStringSortRevCompare(const void *a, const void *b) return strcmp(*sb, *sa); } + +/** + * virStringSearch: + * @str: string to search + * @regexp: POSIX Extended regular expression pattern used for matching + * @max_matches: maximum number of substrings to return + * @result: pointer to an array to be filled with NULL terminated list of matches + * + * Performs a POSIX extended regex search against a string and return all matching substrings. + * The @result value should be freed with virStringFreeList() when no longer + * required. + * + * @code + * char *source = "6853a496-1c10-472e-867a-8244937bd6f0 + * 773ab075-4cd7-4fc2-8b6e-21c84e9cb391 + * bbb3c75c-d60f-43b0-b802-fd56b84a4222 + * 60c04aa1-0375-4654-8d9f-e149d9885273 + * 4548d465-9891-4c34-a184-3b1c34a26aa8"; + * char **matches = NULL; + * virSearchRegex(source, + * "([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})", + * 3, + * &matches); + * + * // matches[0] == "4548d465-9891-4c34-a184-3b1c34a26aa8"; + * // matches[1] == "6853a496-1c10-472e-867a-8244937bd6f0"; + * // matches[2] == "773ab075-4cd7-4fc2-8b6e-21c84e9cb391" + * // matches[3] == NULL; + * + * virStringFreeList(matches); + * @endcode + * + * Returns: -1 on error, or number of matches + */ +ssize_t +virStringSearch(const char *str, + const char *regexp, + size_t max_matches, + char ***matches) +{ + regex_t re; + regmatch_t rem; + size_t nmatches = 0; + ssize_t ret = -1; + int rv = -1; + + *matches = NULL; + + VIR_DEBUG("search '%s' for '%s'", str, regexp); + + if ((rv = regcomp(&re, regexp, REG_EXTENDED)) != 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Error while compiling regular expression '%s': %d"), + regexp, rv); + return -1; + } + + if (re.re_nsub != 1) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Regular expression '%s' must have exactly 1 match group, not %zu"), + regexp, re.re_nsub); + goto cleanup; + } + + /* '*matches' must always be NULL terminated in every iteration + * of the loop, so start by allocating 1 element + */ + if (VIR_EXPAND_N(*matches, nmatches, 1) < 0) + goto cleanup; + + while ((nmatches - 1) < max_matches) { + char *match; + + if (regexec(&re, str, 1, &rem, 0) != 0) + break; + + if (VIR_EXPAND_N(*matches, nmatches, 1) < 0) + goto cleanup; + + if (VIR_STRNDUP(match, str + rem.rm_so, + rem.rm_eo - rem.rm_so) < 0) + goto cleanup; + + VIR_DEBUG("Got '%s'", match); + + (*matches)[nmatches-2] = match; + + str = str + rem.rm_eo; + } + + ret = nmatches - 1; /* don't count the trailing null */ + +cleanup: + regfree(&re); + if (ret < 0) { + virStringFreeList(*matches); + *matches = NULL; + } + return ret; +} diff --git a/src/util/virstring.h b/src/util/virstring.h index 13a6e5a..8efc80c 100644 --- a/src/util/virstring.h +++ b/src/util/virstring.h @@ -226,4 +226,11 @@ size_t virStringListLength(char **strings); int virStringSortCompare(const void *a, const void *b); int virStringSortRevCompare(const void *a, const void *b); +ssize_t virStringSearch(const char *str, + const char *regexp, + size_t max_results, + char ***matches) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4); + + #endif /* __VIR_STRING_H__ */ diff --git a/tests/virstringtest.c b/tests/virstringtest.c index c6adf9f..b8c6115 100644 --- a/tests/virstringtest.c +++ b/tests/virstringtest.c @@ -274,6 +274,70 @@ testStringSortCompare(const void *opaque ATTRIBUTE_UNUSED) } +struct stringSearchData { + const char *str; + const char *regexp; + size_t maxMatches; + size_t expectNMatches; + const char **expectMatches; + bool expectError; +}; + +static int +testStringSearch(const void *opaque ATTRIBUTE_UNUSED) +{ + const struct stringSearchData *data = opaque; + char **matches = NULL; + ssize_t nmatches; + int ret; + + nmatches = virStringSearch(data->str, data->regexp, + data->maxMatches, &matches); + + if (data->expectError) { + if (nmatches != -1) { + fprintf(stderr, "expected error on %s but got %zd matches\n", + data->str, nmatches); + goto cleanup; + } + } else { + size_t i; + + if (nmatches < 0) { + fprintf(stderr, "expected %zu matches on %s but got error\n", + data->expectNMatches, data->str); + goto cleanup; + } + + if (nmatches != data->expectNMatches) { + fprintf(stderr, "expected %zu matches on %s but got %zd\n", + data->expectNMatches, data->str, nmatches); + goto cleanup; + } + + if (virStringListLength(matches) != nmatches) { + fprintf(stderr, "expected %zu matches on %s but got %zd matches\n", + data->expectNMatches, data->str, + virStringListLength(matches)); + goto cleanup; + } + + for (i = 0; i < nmatches; i++) { + if (STRNEQ(matches[i], data->expectMatches[i])) { + fprintf(stderr, "match %zu expected '%s' but got '%s'\n", + i, data->expectMatches[i], matches[i]); + goto cleanup; + } + } + } + + ret = 0; + + cleanup: + virStringFreeList(matches); + return ret; +} + static int mymain(void) { @@ -328,6 +392,42 @@ mymain(void) if (virtTestRun("virStringSortCompare", testStringSortCompare, NULL) < 0) ret = -1; + +#define TEST_SEARCH(s, r, x, n, m, e) \ + do { \ + struct stringSearchData data = { \ + .str = s, \ + .maxMatches = x, \ + .regexp = r, \ + .expectNMatches = n, \ + .expectMatches = m, \ + .expectError = e, \ + }; \ + if (virtTestRun("virStringSearch " s, testStringSearch, &data) < 0) \ + ret = -1; \ + } while (0) + + /* error due to missing () in regexp */ + TEST_SEARCH("foo", "bar", 10, 0, NULL, true); + + /* error due to too many () in regexp */ + TEST_SEARCH("foo", "(b)(a)(r)", 10, 0, NULL, true); + + /* None matching */ + TEST_SEARCH("foo", "(bar)", 10, 0, NULL, false); + + /* Full match */ + const char *matches1[] = { "foo" }; + TEST_SEARCH("foo", "(foo)", 10, 1, matches1, false); + + /* Multi matches */ + const char *matches2[] = { "foo", "bar", "eek" }; + TEST_SEARCH("1foo2bar3eek", "([a-z]+)", 10, 3, matches2, false); + + /* Multi matches, limited returns */ + const char *matches3[] = { "foo", "bar" }; + TEST_SEARCH("1foo2bar3eek", "([a-z]+)", 2, 2, matches3, false); + return ret==0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 1.8.5.3

On 02/19/2014 09:36 PM, Daniel P. Berrange wrote:
From: Manuel VIVES <manuel.vives@diateam.net>
Add a virStringSearch method to virstring.{c,h} which performs a regex match against a string and returns the matching substrings.
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- po/POTFILES.in | 1 + src/libvirt_private.syms | 1 + src/util/virstring.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/virstring.h | 7 ++++ tests/virstringtest.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 211 insertions(+)
@@ -645,3 +647,103 @@ int virStringSortRevCompare(const void *a, const void *b)
return strcmp(*sb, *sa); } + +/** + * virStringSearch: + * @str: string to search + * @regexp: POSIX Extended regular expression pattern used for matching + * @max_matches: maximum number of substrings to return + * @result: pointer to an array to be filled with NULL terminated list of matches + * + * Performs a POSIX extended regex search against a string and return all matching substrings. + * The @result value should be freed with virStringFreeList() when no longer
Double space before 'virStringFreeList'.
+ * required. + * + * @code + * char *source = "6853a496-1c10-472e-867a-8244937bd6f0 + * 773ab075-4cd7-4fc2-8b6e-21c84e9cb391 + * bbb3c75c-d60f-43b0-b802-fd56b84a4222 + * 60c04aa1-0375-4654-8d9f-e149d9885273 + * 4548d465-9891-4c34-a184-3b1c34a26aa8"; + * char **matches = NULL; + * virSearchRegex(source,
The function is now renamed.
+ * "([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})", + * 3, + * &matches); + * + * // matches[0] == "4548d465-9891-4c34-a184-3b1c34a26aa8"; + * // matches[1] == "6853a496-1c10-472e-867a-8244937bd6f0"; + * // matches[2] == "773ab075-4cd7-4fc2-8b6e-21c84e9cb391" + * // matches[3] == NULL;
And these matches are wrong.
+ * + * virStringFreeList(matches); + * @endcode + * + * Returns: -1 on error, or number of matches + */ +ssize_t +virStringSearch(const char *str, + const char *regexp, + size_t max_matches, + char ***matches) +{ + regex_t re; + regmatch_t rem; + size_t nmatches = 0; + ssize_t ret = -1; + int rv = -1; + + *matches = NULL; + + VIR_DEBUG("search '%s' for '%s'", str, regexp); + + if ((rv = regcomp(&re, regexp, REG_EXTENDED)) != 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Error while compiling regular expression '%s': %d"), + regexp, rv);
Using 'regerror' instead of a numeric error code would be nicer.
+ return -1; + } + + if (re.re_nsub != 1) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Regular expression '%s' must have exactly 1 match group, not %zu"), + regexp, re.re_nsub); + goto cleanup; + } + + /* '*matches' must always be NULL terminated in every iteration + * of the loop, so start by allocating 1 element + */ + if (VIR_EXPAND_N(*matches, nmatches, 1) < 0) + goto cleanup; +
diff --git a/tests/virstringtest.c b/tests/virstringtest.c index c6adf9f..b8c6115 100644 --- a/tests/virstringtest.c +++ b/tests/virstringtest.c @@ -274,6 +274,70 @@ testStringSortCompare(const void *opaque ATTRIBUTE_UNUSED) }
+struct stringSearchData { + const char *str; + const char *regexp; + size_t maxMatches; + size_t expectNMatches; + const char **expectMatches; + bool expectError; +}; + +static int +testStringSearch(const void *opaque ATTRIBUTE_UNUSED) +{ + const struct stringSearchData *data = opaque; + char **matches = NULL; + ssize_t nmatches; + int ret;
ret = -1; ACK with or without regerror. Jan

Add a virStringReplace method to virstring.{h,c} to perform substring matching and replacement Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virstring.c | 44 +++++++++++++++++++++++++++++++++++- src/util/virstring.h | 5 ++++ tests/virstringtest.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index f26190d..f7379a2 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1794,6 +1794,7 @@ virStringArrayHasString; virStringFreeList; virStringJoin; virStringListLength; +virStringReplace; virStringSearch; virStringSortCompare; virStringSortRevCompare; diff --git a/src/util/virstring.c b/src/util/virstring.c index 67a87d3..3e42b06 100644 --- a/src/util/virstring.c +++ b/src/util/virstring.c @@ -619,7 +619,6 @@ size_t virStringListLength(char **strings) return i; } - /** * virStringSortCompare: * @@ -747,3 +746,46 @@ cleanup: } return ret; } + +/** + * virStringReplace: + * @haystack: the source string to process + * @oldneedle: the substring to locate + * @newneedle: the substring to insert + * + * Search @haystack and replace all occurences of @oldneedle with @newneedle. + * + * Returns: a new string with all the replacements, or NULL on error + */ +char * +virStringReplace(const char *haystack, + const char *oldneedle, + const char *newneedle) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + const char *tmp1, *tmp2; + size_t oldneedlelen = strlen(oldneedle); + size_t newneedlelen = strlen(newneedle); + + tmp1 = haystack; + tmp2 = NULL; + + while (tmp1) { + tmp2 = strstr(tmp1, oldneedle); + + if (tmp2) { + virBufferAdd(&buf, tmp1, (tmp2 - tmp1)); + virBufferAdd(&buf, newneedle, newneedlelen); + tmp2 += oldneedlelen; + } else { + virBufferAdd(&buf, tmp1, -1); + } + + tmp1 = tmp2; + } + + if (virBufferError(&buf)) + return NULL; + + return virBufferContentAndReset(&buf); +} diff --git a/src/util/virstring.h b/src/util/virstring.h index 8efc80c..5b77581 100644 --- a/src/util/virstring.h +++ b/src/util/virstring.h @@ -232,5 +232,10 @@ ssize_t virStringSearch(const char *str, char ***matches) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(4); +char *virStringReplace(const char *haystack, + const char *oldneedle, + const char *newneedle) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(3); + #endif /* __VIR_STRING_H__ */ diff --git a/tests/virstringtest.c b/tests/virstringtest.c index b8c6115..43023d5 100644 --- a/tests/virstringtest.c +++ b/tests/virstringtest.c @@ -338,6 +338,38 @@ testStringSearch(const void *opaque ATTRIBUTE_UNUSED) return ret; } + +struct stringReplaceData { + const char *haystack; + const char *oldneedle; + const char *newneedle; + const char *result; +}; + +static int +testStringReplace(const void *opaque ATTRIBUTE_UNUSED) +{ + const struct stringReplaceData *data = opaque; + char *result; + int ret = -1; + + result = virStringReplace(data->haystack, + data->oldneedle, + data->newneedle); + + if (STRNEQ_NULLABLE(data->result, result)) { + fprintf(stderr, "Expected '%s' but got '%s'\n", + data->result, NULLSTR(result)); + goto cleanup; + } + + ret = 0; + + cleanup: + return ret; +} + + static int mymain(void) { @@ -428,6 +460,33 @@ mymain(void) const char *matches3[] = { "foo", "bar" }; TEST_SEARCH("1foo2bar3eek", "([a-z]+)", 2, 2, matches3, false); +#define TEST_REPLACE(h, o, n, r) \ + do { \ + struct stringReplaceData data = { \ + .haystack = h, \ + .oldneedle = o, \ + .newneedle = n, \ + .result = r \ + }; \ + if (virtTestRun("virStringReplace " h, testStringReplace, &data) < 0) \ + ret = -1; \ + } while (0) + + /* no matches */ + TEST_REPLACE("foo", "bar", "eek", "foo"); + + /* complete match */ + TEST_REPLACE("foo", "foo", "bar", "bar"); + + /* middle match */ + TEST_REPLACE("foobarwizz", "bar", "eek", "fooeekwizz"); + + /* many matches */ + TEST_REPLACE("foofoofoofoo", "foo", "bar", "barbarbarbar"); + + /* many matches */ + TEST_REPLACE("fooooofoooo", "foo", "bar", "barooobaroo"); + return ret==0 ? EXIT_SUCCESS : EXIT_FAILURE; } -- 1.8.5.3

On 02/19/2014 09:36 PM, Daniel P. Berrange wrote:
Add a virStringReplace method to virstring.{h,c} to perform substring matching and replacement
Signed-off-by: Daniel P. Berrange <berrange@redhat.com> --- src/libvirt_private.syms | 1 + src/util/virstring.c | 44 +++++++++++++++++++++++++++++++++++- src/util/virstring.h | 5 ++++ tests/virstringtest.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index f26190d..f7379a2 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1794,6 +1794,7 @@ virStringArrayHasString; virStringFreeList; virStringJoin; virStringListLength; +virStringReplace; virStringSearch; virStringSortCompare; virStringSortRevCompare; diff --git a/src/util/virstring.c b/src/util/virstring.c index 67a87d3..3e42b06 100644 --- a/src/util/virstring.c +++ b/src/util/virstring.c @@ -619,7 +619,6 @@ size_t virStringListLength(char **strings) return i; }
- /** * virStringSortCompare: *
Unrelated whitespace change.
@@ -747,3 +746,46 @@ cleanup: } return ret; } + +/** + * virStringReplace: + * @haystack: the source string to process + * @oldneedle: the substring to locate + * @newneedle: the substring to insert + * + * Search @haystack and replace all occurences of @oldneedle with @newneedle. + * + * Returns: a new string with all the replacements, or NULL on error + */ +char * +virStringReplace(const char *haystack, + const char *oldneedle, + const char *newneedle) +{ + virBuffer buf = VIR_BUFFER_INITIALIZER; + const char *tmp1, *tmp2; + size_t oldneedlelen = strlen(oldneedle); + size_t newneedlelen = strlen(newneedle); + + tmp1 = haystack; + tmp2 = NULL; + + while (tmp1) { + tmp2 = strstr(tmp1, oldneedle); + + if (tmp2) { + virBufferAdd(&buf, tmp1, (tmp2 - tmp1)); + virBufferAdd(&buf, newneedle, newneedlelen); + tmp2 += oldneedlelen; + } else { + virBufferAdd(&buf, tmp1, -1); + } + + tmp1 = tmp2; + } + + if (virBufferError(&buf))
virBufferError doesn't report an error, but I think this function should.
+ return NULL; + + return virBufferContentAndReset(&buf); +}
diff --git a/tests/virstringtest.c b/tests/virstringtest.c index b8c6115..43023d5 100644 --- a/tests/virstringtest.c +++ b/tests/virstringtest.c @@ -428,6 +460,33 @@ mymain(void) const char *matches3[] = { "foo", "bar" }; TEST_SEARCH("1foo2bar3eek", "([a-z]+)", 2, 2, matches3, false);
+#define TEST_REPLACE(h, o, n, r) \ + do { \ + struct stringReplaceData data = { \ + .haystack = h, \ + .oldneedle = o, \ + .newneedle = n, \ + .result = r \ + }; \ + if (virtTestRun("virStringReplace " h, testStringReplace, &data) < 0) \ + ret = -1; \ + } while (0) + + /* no matches */ + TEST_REPLACE("foo", "bar", "eek", "foo"); + + /* complete match */ + TEST_REPLACE("foo", "foo", "bar", "bar"); + + /* middle match */ + TEST_REPLACE("foobarwizz", "bar", "eek", "fooeekwizz"); + + /* many matches */ + TEST_REPLACE("foofoofoofoo", "foo", "bar", "barbarbarbar"); + + /* many matches */ + TEST_REPLACE("fooooofoooo", "foo", "bar", "barooobaroo"); + return ret==0 ? EXIT_SUCCESS : EXIT_FAILURE; }
A test with different length of old and new needles would be nice. ACK with error reporting. Jan
participants (2)
-
Daniel P. Berrange
-
Ján Tomko