
On Mon, Jun 19, 2023 at 02:03:48PM +0200, Michal Privoznik wrote:
It's weird that in 2023 there's no reliable and portable way to parse strings, but okay.
We started with strtol(). Except, it doesn't work the same on Linux and Windows. On Windows it behaves a bit different when it comes to parsing strings with base = 0. So we've switched to g_ascii_strtoll() which promised to solve this. And it did. Except, it introduced another problem. I don't really understand why, but I've seen random test failures and all of them claimed inability to parse a number (specifically <memory/> from the hard coded domain XML from test driver, which IMO is just a coincidence because it's the first number we parse).
What platforms are you seeing the failures on ? All platforms or just on Windows, or just some other specific one?
The nature of the problem suggests there's a race condition somewhere. I suspected glib but I don't know their code well to prove it.
The code in question is: guint64 g_ascii_strtoull (const gchar *nptr, gchar **endptr, guint base) { #if defined(USE_XLOCALE) && defined(HAVE_STRTOULL_L) return strtoull_l (nptr, endptr, base, get_C_locale ()); #else gboolean negative; guint64 result; result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative); /* Return the result of the appropriate sign. */ return negative ? -result : result; #endif } gint64 g_ascii_strtoll (const gchar *nptr, gchar **endptr, guint base) { #if defined(USE_XLOCALE) && defined(HAVE_STRTOLL_L) return strtoll_l (nptr, endptr, base, get_C_locale ()); #else gboolean negative; guint64 result; result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative); if (negative && result > (guint64) G_MININT64) { errno = ERANGE; return G_MININT64; } else if (!negative && result > (guint64) G_MAXINT64) { errno = ERANGE; return G_MAXINT64; } else if (negative) return - (gint64) result; else return (gint64) result; #endif } On Linux, we should take the first branch on the #ifdef, which is calling strtoll_l just as your patch is switching us to. I guess Windows would use the g_parse_long_long fallback. I don't see anything in that code which could be non-thread safe. It is just a copy of strtol cdoe from glibc. On the Linux case get_C_locale is static locale_t get_C_locale (void) { static gsize initialized = FALSE; static locale_t C_locale = NULL; if (g_once_init_enter (&initialized)) { C_locale = newlocale (LC_ALL_MASK, "C", NULL); g_once_init_leave (&initialized, TRUE); } return C_locale; } and only way that could fail is if newlocale isn't threadsafe, and we have other code in libvirt calling newlocale at exact same time as the *first* call to get_C_locale.
Anyway, using strtoll_l() directly and taking glib out of the picture solves the issue.
I'm pretty surprised if it fixes anything ! I'm a little concerned we might have just masked a bug that's still hiding elsewhere.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- meson.build | 1 + src/util/virstring.c | 82 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 79 insertions(+), 4 deletions(-)
diff --git a/meson.build b/meson.build index aa391e7178..cd713795f5 100644 --- a/meson.build +++ b/meson.build @@ -596,6 +596,7 @@ functions = [ 'sched_setscheduler', 'setgroups', 'setrlimit', + 'strtoll_l', 'symlink', 'sysctlbyname', ] diff --git a/src/util/virstring.c b/src/util/virstring.c index 1a13570d30..076e885624 100644 --- a/src/util/virstring.c +++ b/src/util/virstring.c @@ -127,8 +127,17 @@ virStrToLong_i(char const *s, char **end_ptr, int base, int *result) char *p; int err;
+#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + if (virLocaleInitialize() < 0) + return -1; +#endif + errno = 0; +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + val = strtoll_l(s, &p, base, virLocaleRaw); +#else val = g_ascii_strtoll(s, &p, base); +#endif err = (errno || (!end_ptr && *p) || p == s || (int) val != val); if (end_ptr) *end_ptr = p; @@ -148,13 +157,23 @@ virStrToLong_ui(char const *s, char **end_ptr, int base, unsigned int *result) char *p; bool err = false;
+#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + if (virLocaleInitialize() < 0) + return -1; +#endif + errno = 0; +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + val = strtoull_l(s, &p, base, virLocaleRaw); +#else val = g_ascii_strtoull(s, &p, base); +#endif
/* This one's tricky. We _want_ to allow "-1" as shorthand for * UINT_MAX regardless of whether long is 32-bit or 64-bit. But - * g_ascii_strtoull treats "-1" as ULLONG_MAX, and going from ullong back - * to uint differs depending on the size of uint. */ + * strtoull_l/g_ascii_strtoull treats "-1" as ULLONG_MAX, and + * going from ullong back to uint differs depending on the size of + * uint. */ if (memchr(s, '-', p - s)) { if (-val > UINT_MAX) err = true; @@ -180,8 +199,17 @@ virStrToLong_uip(char const *s, char **end_ptr, int base, unsigned int *result) char *p; bool err = false;
+#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + if (virLocaleInitialize() < 0) + return -1; +#endif + errno = 0; +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + val = strtoull_l(s, &p, base, virLocaleRaw); +#else val = g_ascii_strtoull(s, &p, base); +#endif err = (memchr(s, '-', p - s) || errno || (!end_ptr && *p) || p == s || (unsigned int) val != val); if (end_ptr) @@ -204,13 +232,23 @@ virStrToLong_ul(char const *s, char **end_ptr, int base, unsigned long *result) char *p; bool err = false;
+#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + if (virLocaleInitialize() < 0) + return -1; +#endif + errno = 0; +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + val = strtoull_l(s, &p, base, virLocaleRaw); +#else val = g_ascii_strtoull(s, &p, base); +#endif
/* This one's tricky. We _want_ to allow "-1" as shorthand for * ULONG_MAX regardless of whether long is 32-bit or 64-bit. But - * g_ascii_strtoull treats "-1" as ULLONG_MAX, and going from ullong back - * to ulong differs depending on the size of ulong. */ + * strtoull_l/g_ascii_strtoull treats "-1" as ULLONG_MAX, and + * going from ullong back to ulong differs depending on the size + * of ulong. */ if (memchr(s, '-', p - s)) { if (-val > ULONG_MAX) err = true; @@ -237,8 +275,17 @@ virStrToLong_ulp(char const *s, char **end_ptr, int base, char *p; int err;
+#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + if (virLocaleInitialize() < 0) + return -1; +#endif + errno = 0; +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + val = strtoull_l(s, &p, base, virLocaleRaw); +#else val = g_ascii_strtoull(s, &p, base); +#endif err = (memchr(s, '-', p - s) || errno || (!end_ptr && *p) || p == s || (unsigned long) val != val); if (end_ptr) @@ -257,8 +304,17 @@ virStrToLong_ll(char const *s, char **end_ptr, int base, long long *result) char *p; int err;
+#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + if (virLocaleInitialize() < 0) + return -1; +#endif + errno = 0; +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + val = strtoll_l(s, &p, base, virLocaleRaw); +#else val = g_ascii_strtoll(s, &p, base); +#endif err = (errno || (!end_ptr && *p) || p == s); if (end_ptr) *end_ptr = p; @@ -279,8 +335,17 @@ virStrToLong_ull(char const *s, char **end_ptr, int base, char *p; int err;
+#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + if (virLocaleInitialize() < 0) + return -1; +#endif + errno = 0; +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + val = strtoull_l(s, &p, base, virLocaleRaw); +#else val = g_ascii_strtoull(s, &p, base); +#endif err = (errno || (!end_ptr && *p) || p == s); if (end_ptr) *end_ptr = p; @@ -300,8 +365,17 @@ virStrToLong_ullp(char const *s, char **end_ptr, int base, char *p; int err;
+#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + if (virLocaleInitialize() < 0) + return -1; +#endif + errno = 0; +#if defined(WITH_STRTOLL_L) && defined(WITH_NEWLOCALE) + val = strtoull_l(s, &p, base, virLocaleRaw); +#else val = g_ascii_strtoull(s, &p, base); +#endif err = (memchr(s, '-', p - s) || errno || (!end_ptr && *p) || p == s); if (end_ptr) -- 2.39.3
With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|