[libvirt] [PATCH 0/8] Make debugging of "cannot acquire state change lock" easier

https://bugzilla.redhat.com/show_bug.cgi?id=853839 Jiri Denemark (8): POTFILES.in: Sort Add support for tracking thread jobs Force usage of virThreadCreate virThread: Set thread job virThreadPool: Set thread worker name Set thread job for every RPC call qemu: Track the API which started the current job qemu: Add timing to domain jobs cfg.mk | 9 +++ daemon/remote.c | 1 + include/libvirt/virterror.h | 1 + po/POTFILES.in | 5 +- src/Makefile.am | 2 + src/libvirt_private.syms | 11 +++- src/locking/lock_daemon_dispatch.c | 1 + src/nwfilter/nwfilter_learnipaddr.c | 15 ++--- src/nwfilter/nwfilter_learnipaddr.h | 1 - src/qemu/qemu_domain.c | 60 ++++++++++++++--- src/qemu/qemu_domain.h | 4 ++ src/rpc/gendispatch.pl | 6 +- src/util/virerror.c | 1 + src/util/virthread.c | 25 +++++-- src/util/virthread.h | 13 ++-- src/util/virthreadjob.c | 126 ++++++++++++++++++++++++++++++++++++ src/util/virthreadjob.h | 33 ++++++++++ src/util/virthreadpool.c | 44 ++++++++----- src/util/virthreadpool.h | 14 ++-- 19 files changed, 317 insertions(+), 55 deletions(-) create mode 100644 src/util/virthreadjob.c create mode 100644 src/util/virthreadjob.h -- 2.3.3

Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- po/POTFILES.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/po/POTFILES.in b/po/POTFILES.in index fd8bf33..ab6ae3b 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -178,6 +178,8 @@ src/util/virconf.c src/util/vircrypto.c src/util/virdbus.c src/util/virdnsmasq.c +src/util/virerror.c +src/util/virerror.h src/util/vireventpoll.c src/util/virfile.c src/util/virfirewall.c @@ -218,8 +220,6 @@ src/util/virstorageencryption.c src/util/virstoragefile.c src/util/virstring.c src/util/virsysinfo.c -src/util/virerror.c -src/util/virerror.h src/util/virtime.c src/util/virtpm.c src/util/virtypedparam.c -- 2.3.3

Each thread can use a thread local variable to keep the name of a job which is currently running in the job. The virThreadJobSetWorker API is supposed to be called once by any thread which is used as a worker, i.e., it is waiting in a pool, woken up to do a job, and returned back to the pool. The virThreadJobSet/virThreadJobClear APIs are to be called at the beginning/end of each job. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- include/libvirt/virterror.h | 1 + po/POTFILES.in | 1 + src/Makefile.am | 2 + src/libvirt_private.syms | 7 +++ src/util/virerror.c | 1 + src/util/virthreadjob.c | 126 ++++++++++++++++++++++++++++++++++++++++++++ src/util/virthreadjob.h | 33 ++++++++++++ 7 files changed, 171 insertions(+) create mode 100644 src/util/virthreadjob.c create mode 100644 src/util/virthreadjob.h diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h index 5dc99dc..9c5b069 100644 --- a/include/libvirt/virterror.h +++ b/include/libvirt/virterror.h @@ -125,6 +125,7 @@ typedef enum { VIR_FROM_FIREWALL = 59, /* Error from firewall */ VIR_FROM_POLKIT = 60, /* Error from polkit code */ + VIR_FROM_THREAD = 61, /* Error from thread utils */ # ifdef VIR_ENUM_SENTINELS VIR_ERR_DOMAIN_LAST diff --git a/po/POTFILES.in b/po/POTFILES.in index ab6ae3b..dd06ab3 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -220,6 +220,7 @@ src/util/virstorageencryption.c src/util/virstoragefile.c src/util/virstring.c src/util/virsysinfo.c +src/util/virthreadjob.c src/util/virtime.c src/util/virtpm.c src/util/virtypedparam.c diff --git a/src/Makefile.am b/src/Makefile.am index 00b47e7..ec62ab4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -160,6 +160,7 @@ UTIL_SOURCES = \ util/virsysinfo.c util/virsysinfo.h \ util/virsystemd.c util/virsystemd.h \ util/virthread.c util/virthread.h \ + util/virthreadjob.c utils/virthreadjob.h \ util/virthreadpool.c util/virthreadpool.h \ util/virtime.h util/virtime.c \ util/virtpm.h util/virtpm.c \ @@ -2147,6 +2148,7 @@ libvirt_setuid_rpc_client_la_SOURCES = \ util/virstring.c \ util/virtime.c \ util/virthread.c \ + util/virthreadjob.c \ util/virtypedparam.c \ util/viruri.c \ util/virutil.c \ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 8141eba..f5756a7 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2183,6 +2183,13 @@ virThreadSelf; virThreadSelfID; +# util/virthreadjob.h +virThreadJobClear; +virThreadJobGet; +virThreadJobSet; +virThreadJobSetWorker; + + # util/virthreadpool.h virThreadPoolFree; virThreadPoolGetMaxWorkers; diff --git a/src/util/virerror.c b/src/util/virerror.c index 9635c82..c4e84e7 100644 --- a/src/util/virerror.c +++ b/src/util/virerror.c @@ -132,6 +132,7 @@ VIR_ENUM_IMPL(virErrorDomain, VIR_ERR_DOMAIN_LAST, "Firewall", "Polkit", /* 60 */ + "Thread jobs", ) diff --git a/src/util/virthreadjob.c b/src/util/virthreadjob.c new file mode 100644 index 0000000..2f483f9 --- /dev/null +++ b/src/util/virthreadjob.c @@ -0,0 +1,126 @@ +/* + * virthreadjob.c: code for tracking job associated with current thread + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Author: Jiri Denemark <jdenemar@redhat.com> + */ + +#include <config.h> + +#include "internal.h" +#include "virerror.h" +#include "virlog.h" +#include "virthread.h" +#include "virthreadjob.h" + +#define VIR_FROM_THIS VIR_FROM_THREAD +VIR_LOG_INIT("util.threadjob"); + +virThreadLocal virThreadJobWorker; +virThreadLocal virThreadJobName; + + +static int +virThreadJobOnceInit(void) +{ + if (virThreadLocalInit(&virThreadJobWorker, NULL) < 0 || + virThreadLocalInit(&virThreadJobName, NULL) < 0) + return -1; + return 0; +} + +VIR_ONCE_GLOBAL_INIT(virThreadJob) + + +const char * +virThreadJobGet(void) +{ + const char *job; + + if (virThreadJobInitialize() < 0) + return NULL; + + job = virThreadLocalGet(&virThreadJobName); + if (!job) + job = virThreadLocalGet(&virThreadJobWorker); + + return job; +} + + +void +virThreadJobSetWorker(const char *worker) +{ + if (!worker || virThreadJobInitialize() < 0) + return; + + if (virThreadLocalSet(&virThreadJobWorker, (void *) worker) < 0) + virReportSystemError(errno, + _("cannot set worker name to %s"), + worker); + + VIR_DEBUG("Thread %llu is running worker %s", virThreadSelfID(), worker); +} + + +void +virThreadJobSet(const char *caller) +{ + const char *worker; + + if (!caller || virThreadJobInitialize() < 0) + return; + + if (virThreadLocalSet(&virThreadJobName, (void *) caller) < 0) + virReportSystemError(errno, + _("cannot set current job to %s"), + caller); + + if ((worker = virThreadLocalGet(&virThreadJobWorker))) { + VIR_DEBUG("Thread %llu (%s) is now running job %s", + virThreadSelfID(), worker, caller); + } else { + VIR_DEBUG("Thread %llu is now running job %s", + virThreadSelfID(), caller); + } +} + + +void +virThreadJobClear(int rv) +{ + const char *old; + const char *worker; + + if (virThreadJobInitialize() < 0) + return; + + if (!(old = virThreadLocalGet(&virThreadJobName))) + return; + + if (virThreadLocalSet(&virThreadJobName, NULL) < 0) + virReportSystemError(errno, "%s", _("cannot reset current job")); + + if ((worker = virThreadLocalGet(&virThreadJobWorker))) { + VIR_DEBUG("Thread %llu (%s) finished job %s with ret=%d", + virThreadSelfID(), worker, old, rv); + } else { + VIR_DEBUG("Thread %llu finished job %s with ret=%d", + virThreadSelfID(), old, rv); + } +} diff --git a/src/util/virthreadjob.h b/src/util/virthreadjob.h new file mode 100644 index 0000000..659f91d --- /dev/null +++ b/src/util/virthreadjob.h @@ -0,0 +1,33 @@ +/* + * virthreadjob.h: APIs for tracking job associated with current thread + * + * Copyright (C) 2013-2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Author: Jiri Denemark <jdenemar@redhat.com> + */ + +#ifndef __VIR_THREAD_JOB_H__ +# define __VIR_THREAD_JOB_H__ + + +const char *virThreadJobGet(void); + +void virThreadJobSetWorker(const char *caller); +void virThreadJobSet(const char *caller); +void virThreadJobClear(int rv); + +#endif -- 2.3.3

We want all threads to be set as workers or to have a job assigned to them, which can easily be achieved in virThreadCreate wrapper to pthread_create. Let's make sure we always use the wrapper. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- cfg.mk | 9 +++++++++ src/nwfilter/nwfilter_learnipaddr.c | 15 ++++++--------- src/nwfilter/nwfilter_learnipaddr.h | 1 - 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cfg.mk b/cfg.mk index 6885f9e..661cccc 100644 --- a/cfg.mk +++ b/cfg.mk @@ -999,6 +999,12 @@ sc_prohibit_sysconf_pagesize: halt='use virGetSystemPageSize[KB] instead of sysconf(_SC_PAGESIZE)' \ $(_sc_search_regexp) +sc_prohibit_pthread_create: + @prohibit='\bpthread_create\b' \ + exclude='sc_prohibit_pthread_create' \ + halt="avoid using 'pthread_create', use 'virThreadCreate' instead" \ + $(_sc_search_regexp) + # We don't use this feature of maint.mk. prev_version_file = /dev/null @@ -1192,3 +1198,6 @@ exclude_file_name_regexp--sc_prohibit_virXXXFree = \ exclude_file_name_regexp--sc_prohibit_sysconf_pagesize = \ ^(cfg\.mk|src/util/virutil\.c)$$ + +exclude_file_name_regexp--sc_prohibit_pthread_create = \ + ^(cfg\.mk|src/util/virthread\.c|tests/.*)$$ diff --git a/src/nwfilter/nwfilter_learnipaddr.c b/src/nwfilter/nwfilter_learnipaddr.c index 1b875c3..5b55055 100644 --- a/src/nwfilter/nwfilter_learnipaddr.c +++ b/src/nwfilter/nwfilter_learnipaddr.c @@ -374,7 +374,7 @@ procDHCPOpts(struct dhcp *dhcp, int dhcp_opts_len, * will require that the IP address was taken from an ARP packet or an IPv4 * packet. Both flags can be set at the same time. */ -static void * +static void learnIPAddressThread(void *arg) { char errbuf[PCAP_ERRBUF_SIZE] = {0}; @@ -638,8 +638,6 @@ learnIPAddressThread(void *arg) techdriver->applyDropAllRules(req->ifname); } - memset(&req->thread, 0x0, sizeof(req->thread)); - VIR_DEBUG("pcap thread terminating for interface %s\n", req->ifname); virNWFilterUnlockIface(req->ifname); @@ -648,8 +646,6 @@ learnIPAddressThread(void *arg) virNWFilterDeregisterLearnReq(req->ifindex); virNWFilterIPAddrLearnReqFree(req); - - return 0; } @@ -686,6 +682,7 @@ virNWFilterLearnIPAddress(virNWFilterTechDriverPtr techdriver, enum howDetect howDetect) { int rc; + virThread thread; virNWFilterIPAddrLearnReqPtr req = NULL; virNWFilterHashTablePtr ht = NULL; @@ -742,10 +739,10 @@ virNWFilterLearnIPAddress(virNWFilterTechDriverPtr techdriver, if (rc < 0) goto err_free_req; - if (pthread_create(&req->thread, - NULL, - learnIPAddressThread, - req) != 0) + if (virThreadCreate(&thread, + false, + learnIPAddressThread, + req) != 0) goto err_dereg_req; return 0; diff --git a/src/nwfilter/nwfilter_learnipaddr.h b/src/nwfilter/nwfilter_learnipaddr.h index 1cc881a..b93ed38 100644 --- a/src/nwfilter/nwfilter_learnipaddr.h +++ b/src/nwfilter/nwfilter_learnipaddr.h @@ -49,7 +49,6 @@ struct _virNWFilterIPAddrLearnReq { enum howDetect howDetect; int status; - pthread_t thread; volatile bool terminate; }; -- 2.3.3

Automatically assign a job to every thread created by virThreadCreate. The name of the virThreadFunc function passed to virThreadCreate is used as the job or worker name in case no name is explicitly passed. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/libvirt_private.syms | 2 +- src/util/virthread.c | 25 +++++++++++++++++++++---- src/util/virthread.h | 13 +++++++++---- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index f5756a7..4980305 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2174,7 +2174,7 @@ virRWLockRead; virRWLockUnlock; virRWLockWrite; virThreadCancel; -virThreadCreate; +virThreadCreateFull; virThreadID; virThreadInitialize; virThreadIsSelf; diff --git a/src/util/virthread.c b/src/util/virthread.c index 7e841d1..c2a9e7f 100644 --- a/src/util/virthread.c +++ b/src/util/virthread.c @@ -30,6 +30,7 @@ #endif #include "viralloc.h" +#include "virthreadjob.h" /* Nothing special required for pthreads */ @@ -184,6 +185,8 @@ void virCondBroadcast(virCondPtr c) struct virThreadArgs { virThreadFunc func; + const char *funcName; + bool worker; void *opaque; }; @@ -194,14 +197,26 @@ static void *virThreadHelper(void *data) /* Free args early, rather than tying it up during the entire thread. */ VIR_FREE(args); + + if (local.worker) + virThreadJobSetWorker(local.funcName); + else + virThreadJobSet(local.funcName); + local.func(local.opaque); + + if (!local.worker) + virThreadJobClear(0); + return NULL; } -int virThreadCreate(virThreadPtr thread, - bool joinable, - virThreadFunc func, - void *opaque) +int virThreadCreateFull(virThreadPtr thread, + bool joinable, + virThreadFunc func, + const char *funcName, + bool worker, + void *opaque) { struct virThreadArgs *args; pthread_attr_t attr; @@ -216,6 +231,8 @@ int virThreadCreate(virThreadPtr thread, } args->func = func; + args->funcName = funcName; + args->worker = worker; args->opaque = opaque; if (!joinable) diff --git a/src/util/virthread.h b/src/util/virthread.h index 7146f0f..e466d9b 100644 --- a/src/util/virthread.h +++ b/src/util/virthread.h @@ -88,10 +88,15 @@ void virThreadOnExit(void); typedef void (*virThreadFunc)(void *opaque); -int virThreadCreate(virThreadPtr thread, - bool joinable, - virThreadFunc func, - void *opaque) ATTRIBUTE_RETURN_CHECK; +# define virThreadCreate(thread, joinable, func, opaque) \ + virThreadCreateFull(thread, joinable, func, #func, false, opaque) + +int virThreadCreateFull(virThreadPtr thread, + bool joinable, + virThreadFunc func, + const char *funcName, + bool worker, + void *opaque) ATTRIBUTE_RETURN_CHECK; void virThreadSelf(virThreadPtr thread); bool virThreadIsSelf(virThreadPtr thread); void virThreadJoin(virThreadPtr thread); -- 2.3.3

Every thread created as a worker thread within a pool gets a name according to virThreadPoolJobFunc name. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/libvirt_private.syms | 2 +- src/util/virthreadpool.c | 44 +++++++++++++++++++++++++++----------------- src/util/virthreadpool.h | 14 +++++++++----- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 4980305..90bf08b 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2195,7 +2195,7 @@ virThreadPoolFree; virThreadPoolGetMaxWorkers; virThreadPoolGetMinWorkers; virThreadPoolGetPriorityWorkers; -virThreadPoolNew; +virThreadPoolNewFull; virThreadPoolSendJob; diff --git a/src/util/virthreadpool.c b/src/util/virthreadpool.c index 98d2124..ed22486 100644 --- a/src/util/virthreadpool.c +++ b/src/util/virthreadpool.c @@ -58,6 +58,7 @@ struct _virThreadPool { bool quit; virThreadPoolJobFunc jobFunc; + const char *jobFuncName; void *jobOpaque; virThreadPoolJobList jobList; size_t jobQueueDepth; @@ -156,11 +157,13 @@ static void virThreadPoolWorker(void *opaque) virMutexUnlock(&pool->mutex); } -virThreadPoolPtr virThreadPoolNew(size_t minWorkers, - size_t maxWorkers, - size_t prioWorkers, - virThreadPoolJobFunc func, - void *opaque) +virThreadPoolPtr +virThreadPoolNewFull(size_t minWorkers, + size_t maxWorkers, + size_t prioWorkers, + virThreadPoolJobFunc func, + const char *funcName, + void *opaque) { virThreadPoolPtr pool; size_t i; @@ -175,6 +178,7 @@ virThreadPoolPtr virThreadPoolNew(size_t minWorkers, pool->jobList.tail = pool->jobList.head = NULL; pool->jobFunc = func; + pool->jobFuncName = funcName; pool->jobOpaque = opaque; if (virMutexInit(&pool->mutex) < 0) @@ -196,10 +200,12 @@ virThreadPoolPtr virThreadPoolNew(size_t minWorkers, data->pool = pool; data->cond = &pool->cond; - if (virThreadCreate(&pool->workers[i], - true, - virThreadPoolWorker, - data) < 0) { + if (virThreadCreateFull(&pool->workers[i], + true, + virThreadPoolWorker, + pool->jobFuncName, + true, + data) < 0) { goto error; } pool->nWorkers++; @@ -218,10 +224,12 @@ virThreadPoolPtr virThreadPoolNew(size_t minWorkers, data->cond = &pool->prioCond; data->priority = true; - if (virThreadCreate(&pool->prioWorkers[i], - true, - virThreadPoolWorker, - data) < 0) { + if (virThreadCreateFull(&pool->prioWorkers[i], + true, + virThreadPoolWorker, + pool->jobFuncName, + true, + data) < 0) { goto error; } pool->nPrioWorkers++; @@ -329,10 +337,12 @@ int virThreadPoolSendJob(virThreadPoolPtr pool, data->pool = pool; data->cond = &pool->cond; - if (virThreadCreate(&pool->workers[pool->nWorkers - 1], - true, - virThreadPoolWorker, - data) < 0) { + if (virThreadCreateFull(&pool->workers[pool->nWorkers - 1], + true, + virThreadPoolWorker, + pool->jobFuncName, + true, + data) < 0) { VIR_FREE(data); pool->nWorkers--; goto error; diff --git a/src/util/virthreadpool.h b/src/util/virthreadpool.h index cbac600..538b62f 100644 --- a/src/util/virthreadpool.h +++ b/src/util/virthreadpool.h @@ -33,11 +33,15 @@ typedef virThreadPool *virThreadPoolPtr; typedef void (*virThreadPoolJobFunc)(void *jobdata, void *opaque); -virThreadPoolPtr virThreadPoolNew(size_t minWorkers, - size_t maxWorkers, - size_t prioWorkers, - virThreadPoolJobFunc func, - void *opaque) ATTRIBUTE_NONNULL(4); +# define virThreadPoolNew(min, max, prio, func, opaque) \ + virThreadPoolNewFull(min, max, prio, func, #func, opaque) + +virThreadPoolPtr virThreadPoolNewFull(size_t minWorkers, + size_t maxWorkers, + size_t prioWorkers, + virThreadPoolJobFunc func, + const char *funcName, + void *opaque) ATTRIBUTE_NONNULL(4); size_t virThreadPoolGetMinWorkers(virThreadPoolPtr pool); size_t virThreadPoolGetMaxWorkers(virThreadPoolPtr pool); -- 2.3.3

Since all APIs are also RPC calls, we automatically get all APIs covered with thread jobs. Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- daemon/remote.c | 1 + src/locking/lock_daemon_dispatch.c | 1 + src/qemu/qemu_domain.c | 1 + src/rpc/gendispatch.pl | 6 +++++- 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/daemon/remote.c b/daemon/remote.c index 62a4728..ea7ae94 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -51,6 +51,7 @@ #include "viraccessapicheck.h" #include "viraccessapicheckqemu.h" #include "virpolkit.h" +#include "virthreadjob.h" #define VIR_FROM_THIS VIR_FROM_RPC diff --git a/src/locking/lock_daemon_dispatch.c b/src/locking/lock_daemon_dispatch.c index 168a6af..a7cee9d 100644 --- a/src/locking/lock_daemon_dispatch.c +++ b/src/locking/lock_daemon_dispatch.c @@ -29,6 +29,7 @@ #include "lock_daemon.h" #include "lock_protocol.h" #include "virerror.h" +#include "virthreadjob.h" #define VIR_FROM_THIS VIR_FROM_RPC diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 949bf8b..559400c 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -39,6 +39,7 @@ #include "virtime.h" #include "virstoragefile.h" #include "virstring.h" +#include "virthreadjob.h" #include "storage/storage_driver.h" diff --git a/src/rpc/gendispatch.pl b/src/rpc/gendispatch.pl index 8b488eb..aa73d0c 100755 --- a/src/rpc/gendispatch.pl +++ b/src/rpc/gendispatch.pl @@ -428,8 +428,10 @@ elsif ($mode eq "server") { print " void *args$argann,\n"; print " void *ret$retann)\n"; print "{\n"; + print " int rv;\n"; + print " virThreadJobSet(\"$name\");\n"; print " VIR_DEBUG(\"server=%p client=%p msg=%p rerr=%p args=%p ret=%p\", server, client, msg, rerr, args, ret);\n"; - print " return $name(server, client, msg, rerr"; + print " rv = $name(server, client, msg, rerr"; if ($argtype ne "void") { print ", args"; } @@ -437,6 +439,8 @@ elsif ($mode eq "server") { print ", ret"; } print ");\n"; + print " virThreadJobClear(rv);\n"; + print " return rv;\n"; print "}\n"; # Finally we print out the dispatcher method body impl -- 2.3.3

This is very helpful when we want to log and report why we could not acquire a state change lock. Reporting what job keeps it locked helps with understanding the issue. Moreover, after calling virDomainGetControlInfo, it's possible to tell whether libvirt is just stuck somewhere within the API (or it just forgot to cleanup the job) or whether libvirt is waiting for QEMU to reply. The error message will look like the following: # virsh resume cd error: Failed to resume domain cd error: Timed out during operation: cannot acquire state change lock (held by remoteDispatchDomainSuspend) https://bugzilla.redhat.com/show_bug.cgi?id=853839 Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_domain.c | 43 +++++++++++++++++++++++++++++++++---------- src/qemu/qemu_domain.h | 2 ++ 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 559400c..dc55016 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -153,6 +153,7 @@ qemuDomainObjResetJob(qemuDomainObjPrivatePtr priv) job->active = QEMU_JOB_NONE; job->owner = 0; + job->ownerAPI = NULL; } static void @@ -162,6 +163,7 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv) job->asyncJob = QEMU_ASYNC_JOB_NONE; job->asyncOwner = 0; + job->asyncOwnerAPI = NULL; job->phase = 0; job->mask = QEMU_JOB_DEFAULT_MASK; job->dump_memory_only = false; @@ -1322,6 +1324,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, unsigned long long then; bool nested = job == QEMU_JOB_ASYNC_NESTED; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + const char *blocker = NULL; int ret = -1; VIR_DEBUG("Starting %s: %s (async=%s vm=%p name=%s)", @@ -1370,6 +1373,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, obj, obj->def->name); priv->job.active = job; priv->job.owner = virThreadSelfID(); + priv->job.ownerAPI = virThreadJobGet(); } else { VIR_DEBUG("Started async job: %s (vm=%p name=%s)", qemuDomainAsyncJobTypeToString(asyncJob), @@ -1379,6 +1383,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, goto cleanup; priv->job.asyncJob = asyncJob; priv->job.asyncOwner = virThreadSelfID(); + priv->job.asyncOwnerAPI = virThreadJobGet(); priv->job.current->started = now; } @@ -1389,29 +1394,47 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, return 0; error: - VIR_WARN("Cannot start job (%s, %s) for domain %s;" - " current job is (%s, %s) owned by (%llu, %llu)", + VIR_WARN("Cannot start job (%s, %s) for domain %s; " + "current job is (%s, %s) owned by (%llu %s, %llu %s)", qemuDomainJobTypeToString(job), qemuDomainAsyncJobTypeToString(asyncJob), obj->def->name, qemuDomainJobTypeToString(priv->job.active), qemuDomainAsyncJobTypeToString(priv->job.asyncJob), - priv->job.owner, priv->job.asyncOwner); + priv->job.owner, NULLSTR(priv->job.ownerAPI), + priv->job.asyncOwner, NULLSTR(priv->job.asyncOwnerAPI)); + + if (nested || qemuDomainNestedJobAllowed(priv, job)) + blocker = priv->job.ownerAPI; + else + blocker = priv->job.asyncOwnerAPI; ret = -1; if (errno == ETIMEDOUT) { - virReportError(VIR_ERR_OPERATION_TIMEOUT, - "%s", _("cannot acquire state change lock")); + if (blocker) { + virReportError(VIR_ERR_OPERATION_TIMEOUT, + _("cannot acquire state change lock (held by %s)"), + blocker); + } else { + virReportError(VIR_ERR_OPERATION_TIMEOUT, "%s", + _("cannot acquire state change lock")); + } ret = -2; } else if (cfg->maxQueuedJobs && priv->jobs_queued > cfg->maxQueuedJobs) { - virReportError(VIR_ERR_OPERATION_FAILED, - "%s", _("cannot acquire state change lock " - "due to max_queued limit")); + if (blocker) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("cannot acquire state change lock (held by %s) " + "due to max_queued limit"), + blocker); + } else { + virReportError(VIR_ERR_OPERATION_FAILED, "%s", + _("cannot acquire state change lock " + "due to max_queued limit")); + } ret = -2; } else { - virReportSystemError(errno, - "%s", _("cannot acquire job mutex")); + virReportSystemError(errno, "%s", _("cannot acquire job mutex")); } cleanup: diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index ba8d398..a468982 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -117,10 +117,12 @@ struct qemuDomainJobObj { virCond cond; /* Use to coordinate jobs */ qemuDomainJob active; /* Currently running job */ unsigned long long owner; /* Thread id which set current job */ + const char *ownerAPI; /* The API which owns the job */ virCond asyncCond; /* Use to coordinate with async jobs */ qemuDomainAsyncJob asyncJob; /* Currently active async job */ unsigned long long asyncOwner; /* Thread which set current async job */ + const char *asyncOwnerAPI; /* The API which owns the async job */ int phase; /* Job phase (mainly for migrations) */ unsigned long long mask; /* Jobs allowed during async job */ bool dump_memory_only; /* use dump-guest-memory to do dump */ -- 2.3.3

Whenever we fail to acquire a job, we can report how long ago it was locked by another API. https://bugzilla.redhat.com/show_bug.cgi?id=853839 Signed-off-by: Jiri Denemark <jdenemar@redhat.com> --- src/qemu/qemu_domain.c | 20 ++++++++++++++++++-- src/qemu/qemu_domain.h | 2 ++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index dc55016..72b1b84 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -154,6 +154,7 @@ qemuDomainObjResetJob(qemuDomainObjPrivatePtr priv) job->active = QEMU_JOB_NONE; job->owner = 0; job->ownerAPI = NULL; + job->started = 0; } static void @@ -164,6 +165,7 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv) job->asyncJob = QEMU_ASYNC_JOB_NONE; job->asyncOwner = 0; job->asyncOwnerAPI = NULL; + job->asyncStarted = 0; job->phase = 0; job->mask = QEMU_JOB_DEFAULT_MASK; job->dump_memory_only = false; @@ -1326,6 +1328,8 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); const char *blocker = NULL; int ret = -1; + unsigned long long duration = 0; + unsigned long long asyncDuration = 0; VIR_DEBUG("Starting %s: %s (async=%s vm=%p name=%s)", job == QEMU_JOB_ASYNC ? "async job" : "job", @@ -1366,6 +1370,8 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, qemuDomainObjResetJob(priv); + ignore_value(virTimeMillisNow(&now)); + if (job != QEMU_JOB_ASYNC) { VIR_DEBUG("Started job: %s (async=%s vm=%p name=%s)", qemuDomainJobTypeToString(job), @@ -1374,6 +1380,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, priv->job.active = job; priv->job.owner = virThreadSelfID(); priv->job.ownerAPI = virThreadJobGet(); + priv->job.started = now; } else { VIR_DEBUG("Started async job: %s (vm=%p name=%s)", qemuDomainAsyncJobTypeToString(asyncJob), @@ -1384,6 +1391,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, priv->job.asyncJob = asyncJob; priv->job.asyncOwner = virThreadSelfID(); priv->job.asyncOwnerAPI = virThreadJobGet(); + priv->job.asyncStarted = now; priv->job.current->started = now; } @@ -1394,15 +1402,23 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver, return 0; error: + ignore_value(virTimeMillisNow(&now)); + if (priv->job.active && priv->job.started) + duration = now - priv->job.started; + if (priv->job.asyncJob && priv->job.asyncStarted) + asyncDuration = now - priv->job.asyncStarted; + VIR_WARN("Cannot start job (%s, %s) for domain %s; " - "current job is (%s, %s) owned by (%llu %s, %llu %s)", + "current job is (%s, %s) owned by (%llu %s, %llu %s) " + "for (%llus, %llus)", qemuDomainJobTypeToString(job), qemuDomainAsyncJobTypeToString(asyncJob), obj->def->name, qemuDomainJobTypeToString(priv->job.active), qemuDomainAsyncJobTypeToString(priv->job.asyncJob), priv->job.owner, NULLSTR(priv->job.ownerAPI), - priv->job.asyncOwner, NULLSTR(priv->job.asyncOwnerAPI)); + priv->job.asyncOwner, NULLSTR(priv->job.asyncOwnerAPI), + duration / 1000, asyncDuration / 1000); if (nested || qemuDomainNestedJobAllowed(priv, job)) blocker = priv->job.ownerAPI; diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index a468982..9da80ca 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -118,11 +118,13 @@ struct qemuDomainJobObj { qemuDomainJob active; /* Currently running job */ unsigned long long owner; /* Thread id which set current job */ const char *ownerAPI; /* The API which owns the job */ + unsigned long long started; /* When the current job started */ virCond asyncCond; /* Use to coordinate with async jobs */ qemuDomainAsyncJob asyncJob; /* Currently active async job */ unsigned long long asyncOwner; /* Thread which set current async job */ const char *asyncOwnerAPI; /* The API which owns the async job */ + unsigned long long asyncStarted; /* When the current async job started */ int phase; /* Job phase (mainly for migrations) */ unsigned long long mask; /* Jobs allowed during async job */ bool dump_memory_only; /* use dump-guest-memory to do dump */ -- 2.3.3

On 03/23/2015 09:25 AM, Jiri Denemark wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=853839
Jiri Denemark (8): POTFILES.in: Sort Add support for tracking thread jobs Force usage of virThreadCreate virThread: Set thread job virThreadPool: Set thread worker name Set thread job for every RPC call qemu: Track the API which started the current job qemu: Add timing to domain jobs
cfg.mk | 9 +++ daemon/remote.c | 1 + include/libvirt/virterror.h | 1 + po/POTFILES.in | 5 +- src/Makefile.am | 2 + src/libvirt_private.syms | 11 +++- src/locking/lock_daemon_dispatch.c | 1 + src/nwfilter/nwfilter_learnipaddr.c | 15 ++--- src/nwfilter/nwfilter_learnipaddr.h | 1 - src/qemu/qemu_domain.c | 60 ++++++++++++++--- src/qemu/qemu_domain.h | 4 ++ src/rpc/gendispatch.pl | 6 +- src/util/virerror.c | 1 + src/util/virthread.c | 25 +++++-- src/util/virthread.h | 13 ++-- src/util/virthreadjob.c | 126 ++++++++++++++++++++++++++++++++++++ src/util/virthreadjob.h | 33 ++++++++++ src/util/virthreadpool.c | 44 ++++++++----- src/util/virthreadpool.h | 14 ++-- 19 files changed, 317 insertions(+), 55 deletions(-) create mode 100644 src/util/virthreadjob.c create mode 100644 src/util/virthreadjob.h
Looks fine to me... Couple of 'overall' comments... * Clean run through my Coverity checker. * New modules add copyright for 2013-2015 - shouldn't that just be 2015 * Other modules touched haven't had their copyrights adjusted... I don't always remember either - perhaps everyone needs eblake's emacs macro to auto update when you edit a file. ACK series John

On Tue, Mar 24, 2015 at 13:50:39 -0400, John Ferlan wrote:
On 03/23/2015 09:25 AM, Jiri Denemark wrote:
https://bugzilla.redhat.com/show_bug.cgi?id=853839
Jiri Denemark (8): POTFILES.in: Sort Add support for tracking thread jobs Force usage of virThreadCreate virThread: Set thread job virThreadPool: Set thread worker name Set thread job for every RPC call qemu: Track the API which started the current job qemu: Add timing to domain jobs
cfg.mk | 9 +++ daemon/remote.c | 1 + include/libvirt/virterror.h | 1 + po/POTFILES.in | 5 +- src/Makefile.am | 2 + src/libvirt_private.syms | 11 +++- src/locking/lock_daemon_dispatch.c | 1 + src/nwfilter/nwfilter_learnipaddr.c | 15 ++--- src/nwfilter/nwfilter_learnipaddr.h | 1 - src/qemu/qemu_domain.c | 60 ++++++++++++++--- src/qemu/qemu_domain.h | 4 ++ src/rpc/gendispatch.pl | 6 +- src/util/virerror.c | 1 + src/util/virthread.c | 25 +++++-- src/util/virthread.h | 13 ++-- src/util/virthreadjob.c | 126 ++++++++++++++++++++++++++++++++++++ src/util/virthreadjob.h | 33 ++++++++++ src/util/virthreadpool.c | 44 ++++++++----- src/util/virthreadpool.h | 14 ++-- 19 files changed, 317 insertions(+), 55 deletions(-) create mode 100644 src/util/virthreadjob.c create mode 100644 src/util/virthreadjob.h
Looks fine to me... Couple of 'overall' comments...
* Clean run through my Coverity checker.
Nice.
* New modules add copyright for 2013-2015 - shouldn't that just be 2015
That's the period during which the file was created :-) Higher priority things always appeared and prevented me from finishing this...
* Other modules touched haven't had their copyrights adjusted... I don't always remember either - perhaps everyone needs eblake's emacs macro to auto update when you edit a file.
I don't use emacs and even if I did I wouldn't use the macro anyway :-) I think copyright years are just useless.
ACK series
Thanks, pushed. Jirka
participants (2)
-
Jiri Denemark
-
John Ferlan