[libvirt] [PATCH 1/2] Add a parameter to virThreadPoolSendJob() to let the caller decide whether to wait for the job to complete

--- src/qemu/qemu_driver.c | 2 +- src/util/threadpool.c | 19 ++++++++++++++++++- src/util/threadpool.h | 3 ++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 924446f..aa2e805 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -665,7 +665,7 @@ qemuHandleDomainWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED, if (VIR_ALLOC(wdEvent) == 0) { wdEvent->action = VIR_DOMAIN_WATCHDOG_ACTION_DUMP; wdEvent->vm = vm; - ignore_value(virThreadPoolSendJob(driver->workerPool, wdEvent)); + ignore_value(virThreadPoolSendJob(driver->workerPool, wdEvent, false)); } else virReportOOMError(); } diff --git a/src/util/threadpool.c b/src/util/threadpool.c index 1213862..07f2fcf 100644 --- a/src/util/threadpool.c +++ b/src/util/threadpool.c @@ -42,6 +42,7 @@ struct _virThreadPoolJob { virThreadPoolJobPtr next; void *data; + virCondPtr complete; }; typedef struct _virThreadPoolJobList virThreadPoolJobList; @@ -73,6 +74,7 @@ struct _virThreadPool { static void virThreadPoolWorker(void *opaque) { virThreadPoolPtr pool = opaque; + virCondPtr complete; virMutexLock(&pool->mutex); @@ -97,9 +99,12 @@ static void virThreadPoolWorker(void *opaque) pool->jobList.tail = &pool->jobList.head; virMutexUnlock(&pool->mutex); + complete = job->complete; (pool->jobFunc)(job->data, pool->jobOpaque); VIR_FREE(job); virMutexLock(&pool->mutex); + if (complete) + virCondSignal(complete); } out: @@ -188,9 +193,14 @@ void virThreadPoolFree(virThreadPoolPtr pool) } int virThreadPoolSendJob(virThreadPoolPtr pool, - void *jobData) + void *jobData, + bool waitForCompletion) { virThreadPoolJobPtr job; + virCond complete; + + if (waitForCompletion && virCondInit(&complete) < 0) + return -1; virMutexLock(&pool->mutex); if (pool->quit) @@ -219,10 +229,17 @@ int virThreadPoolSendJob(virThreadPoolPtr pool, job->data = jobData; job->next = NULL; + job->complete = NULL; *pool->jobList.tail = job; pool->jobList.tail = &(*pool->jobList.tail)->next; virCondSignal(&pool->cond); + + if (waitForCompletion) { + job->complete = &complete; + virCondWait(&complete, &pool->mutex); + } + virMutexUnlock(&pool->mutex); return 0; diff --git a/src/util/threadpool.h b/src/util/threadpool.h index 5714b0b..6f763dc 100644 --- a/src/util/threadpool.h +++ b/src/util/threadpool.h @@ -41,7 +41,8 @@ virThreadPoolPtr virThreadPoolNew(size_t minWorkers, void virThreadPoolFree(virThreadPoolPtr pool); int virThreadPoolSendJob(virThreadPoolPtr pool, - void *jobdata) ATTRIBUTE_NONNULL(1) + void *jobdata, + bool waitForCompletion) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; -- 1.7.3.1 -- Thanks, Hu Tao

--- src/remote/remote_driver.c | 2 +- tools/virsh.c | 126 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 110 insertions(+), 18 deletions(-) diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index ee2de4a..d24d54f 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -10122,7 +10122,7 @@ remoteIOEventLoop(virConnectPtr conn, repoll: ret = poll(fds, ARRAY_CARDINALITY(fds), -1); - if (ret < 0 && errno == EAGAIN) + if (ret < 0 && (errno == EAGAIN || errno == EINTR)) goto repoll; #ifdef HAVE_PTHREAD_SIGMASK diff --git a/tools/virsh.c b/tools/virsh.c index 8c123bb..efc7d1a 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -54,6 +54,9 @@ #include "files.h" #include "../daemon/event.h" #include "configmake.h" +#include "threads.h" +#include "threadpool.h" +#include "datatypes.h" static char *progname; @@ -208,6 +211,7 @@ typedef struct __vshCmd { typedef struct __vshControl { char *name; /* connection name */ virConnectPtr conn; /* connection to hypervisor (MAY BE NULL) */ + virDomainPtr dom; vshCmd *cmd; /* the current command */ char *cmdstr; /* string with command */ int imode; /* interactive mode? */ @@ -221,6 +225,9 @@ typedef struct __vshControl { int log_fd; /* log file descriptor */ char *historydir; /* readline history directory name */ char *historyfile; /* readline history file name */ + + virMutex mutex; + virThreadPoolPtr threadPool; } __vshControl; typedef struct vshCmdGrp { @@ -229,6 +236,7 @@ typedef struct vshCmdGrp { const vshCmdDef *commands; } vshCmdGrp; +static vshControl _ctl; static const vshCmdGrp cmdGroups[]; static void vshError(vshControl *ctl, const char *format, ...) @@ -258,6 +266,7 @@ static long long vshCommandOptLongLong(const vshCmd *cmd, const char *name, int *found); static int vshCommandOptBool(const vshCmd *cmd, const char *name); static char *vshCommandOptArgv(const vshCmd *cmd, int count); +static int vshCommandStringParse(vshControl *ctl, char *cmdstr); #define VSH_BYID (1 << 1) #define VSH_BYUUID (1 << 2) @@ -509,6 +518,44 @@ static void vshCatchDisconnect(int sig, siginfo_t * siginfo, if ((sig == SIGPIPE) || (siginfo->si_signo == SIGPIPE)) disconnected++; } +#endif + +#ifdef SIGINT +static void vshCatchInt(int sig, siginfo_t *siginfo, + void *context ATTRIBUTE_UNUSED) { + vshControl *ctl = &_ctl; + vshCmd *cmds, *c; + char *cmdStr = NULL; + + virMutexLock(&ctl->mutex); + if (!ctl->dom) + goto error; + + if (virAsprintf(&cmdStr, "domjobabort %s", ctl->dom->name) < 0) + goto error; + + if ((sig == SIGINT) || (siginfo->si_signo == SIGINT)) { + if (!vshConnectionUsability (ctl, ctl->conn)) + goto error; + + vshCommandStringParse(ctl, cmdStr); + cmds = ctl->cmd; + ctl->cmd = NULL; + virMutexUnlock(&ctl->mutex); + do { + c = cmds; + cmds = cmds->next; + c->next = NULL; + ignore_value(virThreadPoolSendJob(ctl->threadPool, c, true)); + } while (cmds); + } + VIR_FREE(cmdStr); + return; +error: + virMutexUnlock(&ctl->mutex); + VIR_FREE(cmdStr); +} +#endif /* * vshSetupSignals: @@ -520,17 +567,20 @@ static void vshSetupSignals(void) { struct sigaction sig_action; - sig_action.sa_sigaction = vshCatchDisconnect; sig_action.sa_flags = SA_SIGINFO; sigemptyset(&sig_action.sa_mask); +#ifdef SIGPIPE + sig_action.sa_sigaction = vshCatchDisconnect; sigaction(SIGPIPE, &sig_action, NULL); -} -#else -static void -vshSetupSignals(void) {} #endif +#ifdef SIGINT + sig_action.sa_sigaction = vshCatchInt; + sigaction(SIGINT, &sig_action, NULL); +#endif +} + /* * vshReconnect: * @@ -1848,6 +1898,10 @@ cmdDump(vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain(ctl, cmd, &name))) return FALSE; + virMutexLock(&ctl->mutex); + ctl->dom = dom; + virMutexUnlock(&ctl->mutex); + if (vshCommandOptBool (cmd, "live")) flags |= VIR_DUMP_LIVE; if (vshCommandOptBool (cmd, "crash")) @@ -1860,6 +1914,9 @@ cmdDump(vshControl *ctl, const vshCmd *cmd) ret = FALSE; } + virMutexLock(&ctl->mutex); + ctl->dom = NULL; + virMutexUnlock(&ctl->mutex); virDomainFree(dom); return ret; } @@ -3396,6 +3453,9 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain (ctl, cmd, NULL))) return FALSE; + virMutexLock(&ctl->mutex); + ctl->dom = dom; + virMutexUnlock(&ctl->mutex); desturi = vshCommandOptString (cmd, "desturi", &found); if (!found) goto done; @@ -3454,6 +3514,9 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) } done: + virMutexLock(&ctl->mutex); + ctl->dom = NULL; + virMutexUnlock(&ctl->mutex); if (dom) virDomainFree (dom); return ret; } @@ -10837,12 +10900,15 @@ vshCommandOptSecret(vshControl *ctl, const vshCmd *cmd, char **name) /* * Executes command(s) and returns return code from last command */ -static int -vshCommandRun(vshControl *ctl, const vshCmd *cmd) +static void +vshCommandRun(void *jobdata, void *opaque) { + vshControl *ctl = opaque; + vshCmd *cmds = jobdata; + vshCmd *cmd; int ret = TRUE; - while (cmd) { + while (cmds) { struct timeval before, after; bool enable_timing = ctl->timing; @@ -10852,6 +10918,9 @@ vshCommandRun(vshControl *ctl, const vshCmd *cmd) if (enable_timing) GETTIMEOFDAY(&before); + cmd = cmds; + cmds = cmds->next; + ret = cmd->def->handler(ctl, cmd); if (enable_timing) @@ -10871,17 +10940,12 @@ vshCommandRun(vshControl *ctl, const vshCmd *cmd) (last_error->code == VIR_ERR_INVALID_CONN))))) vshReconnect(ctl); - if (STREQ(cmd->def->name, "quit")) /* hack ... */ - return ret; - if (enable_timing) vshPrint(ctl, _("\n(Time: %.3f ms)\n\n"), DIFF_MSEC(&after, &before)); else vshPrintExtra(ctl, "\n"); - cmd = cmd->next; } - return ret; } /* --------------- @@ -11284,6 +11348,9 @@ vshInit(vshControl *ctl) if (ctl->conn) return FALSE; + if (virMutexInit(&ctl->mutex) < 0) + return FALSE; + vshOpenLogFile(ctl); /* set up the library error handler */ @@ -11315,6 +11382,12 @@ vshInit(vshControl *ctl) return FALSE; } + ctl->threadPool = virThreadPoolNew(0, 2, vshCommandRun, ctl); + if (!ctl->threadPool) { + return FALSE; + } + + return TRUE; } @@ -11669,6 +11742,7 @@ vshReadline (vshControl *ctl, const char *prompt) static int vshDeinit(vshControl *ctl) { + virThreadPoolFree(ctl->threadPool); vshReadlineDeinit(ctl); vshCloseLogFile(ctl); VIR_FREE(ctl->name); @@ -11938,7 +12012,7 @@ vshParseArgv(vshControl *ctl, int argc, char **argv) int main(int argc, char **argv) { - vshControl _ctl, *ctl = &_ctl; + vshControl *ctl = &_ctl; char *defaultConn; int ret = TRUE; @@ -11979,7 +12053,16 @@ main(int argc, char **argv) } if (!ctl->imode) { - ret = vshCommandRun(ctl, ctl->cmd); + vshCmd *cmds, *c; + + cmds = ctl->cmd; + ctl->cmd = NULL; + do { + c = cmds; + cmds = cmds->next; + c->next = NULL; + ignore_value(virThreadPoolSendJob(ctl->threadPool, c, true)); + } while (cmds); } else { /* interactive mode */ if (!ctl->quiet) { @@ -11997,6 +12080,7 @@ main(int argc, char **argv) } do { + vshCmd *cmds, *c; const char *prompt = ctl->readonly ? VSH_PROMPT_RO : VSH_PROMPT_RW; ctl->cmdstr = vshReadline(ctl, prompt); @@ -12006,8 +12090,16 @@ main(int argc, char **argv) #if USE_READLINE add_history(ctl->cmdstr); #endif - if (vshCommandStringParse(ctl, ctl->cmdstr)) - vshCommandRun(ctl, ctl->cmd); + if (vshCommandStringParse(ctl, ctl->cmdstr)) { + cmds = ctl->cmd; + ctl->cmd = NULL; + do { + c = cmds; + cmds = cmds->next; + c->next = NULL; + ignore_value(virThreadPoolSendJob(ctl->threadPool, c, true)); + } while (cmds); + } } VIR_FREE(ctl->cmdstr); } while (ctl->imode); -- 1.7.3.1 -- Thanks, Hu Tao

On 12/23/2010 02:07 AM, Hu Tao wrote:
---
No commit message beyond the title?
@@ -208,6 +211,7 @@ typedef struct __vshCmd { typedef struct __vshControl { char *name; /* connection name */ virConnectPtr conn; /* connection to hypervisor (MAY BE NULL) */ + virDomainPtr dom;
No comment as to it's purpose?
vshCmd *cmd; /* the current command */ char *cmdstr; /* string with command */ int imode; /* interactive mode? */ @@ -221,6 +225,9 @@ typedef struct __vshControl { int log_fd; /* log file descriptor */ char *historydir; /* readline history directory name */ char *historyfile; /* readline history file name */ + + virMutex mutex; + virThreadPoolPtr threadPool;
Likewise.
@@ -509,6 +518,44 @@ static void vshCatchDisconnect(int sig, siginfo_t * siginfo, if ((sig == SIGPIPE) || (siginfo->si_signo == SIGPIPE)) disconnected++; } +#endif + +#ifdef SIGINT
Unnecessary ifdef - SIGINT is available on all platforms (it is required by C).
+static void vshCatchInt(int sig, siginfo_t *siginfo, + void *context ATTRIBUTE_UNUSED) { + vshControl *ctl = &_ctl; + vshCmd *cmds, *c; + char *cmdStr = NULL; + + virMutexLock(&ctl->mutex); + if (!ctl->dom) + goto error; + + if (virAsprintf(&cmdStr, "domjobabort %s", ctl->dom->name) < 0)
_BIG_ no-no. virAsprintf calls malloc(), which is NOT signal-safe (that is, if the signal arrived while some other thread was in the middle of a malloc(), you've just caused deadlock). The only safe thing to do in a signal handler is to set some state variables that are then checked in the main processing loop, when you are back in a safe state to act on a (minimally-delayed) basis on the signal. See how daemon/libvirtd.c uses qemudDispatchSignalEvent (although that's being rewritten by danpb's factorization into a cleaner rpc server/client model).
+ goto error; + + if ((sig == SIGINT) || (siginfo->si_signo == SIGINT)) { + if (!vshConnectionUsability (ctl, ctl->conn)) + goto error; + + vshCommandStringParse(ctl, cmdStr); + cmds = ctl->cmd; + ctl->cmd = NULL; + virMutexUnlock(&ctl->mutex); + do { + c = cmds; + cmds = cmds->next; + c->next = NULL; + ignore_value(virThreadPoolSendJob(ctl->threadPool, c, true));
Continuing in the vein of bad practice - blocking on another thread to complete inside a signal handler is crazy. But setting up a threadpool to handle signals in a separate thread means that your signal-handling thread is already independently available to cancel the job that is underway in the primary thread.
+ } while (cmds); + } + VIR_FREE(cmdStr); + return; +error: + virMutexUnlock(&ctl->mutex); + VIR_FREE(cmdStr); +} +#endif
/* * vshSetupSignals: @@ -520,17 +567,20 @@ static void vshSetupSignals(void) { struct sigaction sig_action;
- sig_action.sa_sigaction = vshCatchDisconnect; sig_action.sa_flags = SA_SIGINFO; sigemptyset(&sig_action.sa_mask);
+#ifdef SIGPIPE
Might be unnecessary - gnulib guarantees that a working replacement for SIGPIPE is available on mingw, which is the only platform that lacks it natively. Oh, but that module is currently LGPLv3+, I'll have to ask on the gnulib list if people are willing to relax it to LGPLv2+ so we can use it. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

On Thu, Dec 23, 2010 at 09:05:58AM -0700, Eric Blake wrote:
On 12/23/2010 02:07 AM, Hu Tao wrote:
---
No commit message beyond the title?
@@ -208,6 +211,7 @@ typedef struct __vshCmd { typedef struct __vshControl { char *name; /* connection name */ virConnectPtr conn; /* connection to hypervisor (MAY BE NULL) */ + virDomainPtr dom;
No comment as to it's purpose?
Will update it if I update the patch.
vshCmd *cmd; /* the current command */ char *cmdstr; /* string with command */ int imode; /* interactive mode? */ @@ -221,6 +225,9 @@ typedef struct __vshControl { int log_fd; /* log file descriptor */ char *historydir; /* readline history directory name */ char *historyfile; /* readline history file name */ + + virMutex mutex; + virThreadPoolPtr threadPool;
Likewise.
Will update it if I update the patch.
@@ -509,6 +518,44 @@ static void vshCatchDisconnect(int sig, siginfo_t * siginfo, if ((sig == SIGPIPE) || (siginfo->si_signo == SIGPIPE)) disconnected++; } +#endif + +#ifdef SIGINT
Unnecessary ifdef - SIGINT is available on all platforms (it is required by C).
+static void vshCatchInt(int sig, siginfo_t *siginfo, + void *context ATTRIBUTE_UNUSED) { + vshControl *ctl = &_ctl; + vshCmd *cmds, *c; + char *cmdStr = NULL; + + virMutexLock(&ctl->mutex); + if (!ctl->dom) + goto error; + + if (virAsprintf(&cmdStr, "domjobabort %s", ctl->dom->name) < 0)
_BIG_ no-no. virAsprintf calls malloc(), which is NOT signal-safe (that is, if the signal arrived while some other thread was in the middle of a malloc(), you've just caused deadlock). The only safe thing to do in a signal handler is to set some state variables that are then checked in the main processing loop, when you are back in a safe state to act on a (minimally-delayed) basis on the signal.
See how daemon/libvirtd.c uses qemudDispatchSignalEvent (although that's being rewritten by danpb's factorization into a cleaner rpc server/client model).
+ goto error; + + if ((sig == SIGINT) || (siginfo->si_signo == SIGINT)) { + if (!vshConnectionUsability (ctl, ctl->conn)) + goto error; + + vshCommandStringParse(ctl, cmdStr); + cmds = ctl->cmd; + ctl->cmd = NULL; + virMutexUnlock(&ctl->mutex); + do { + c = cmds; + cmds = cmds->next; + c->next = NULL; + ignore_value(virThreadPoolSendJob(ctl->threadPool, c, true));
Continuing in the vein of bad practice - blocking on another thread to complete inside a signal handler is crazy. But setting up a threadpool
I did a copy&paste and forgot to change true to false :)
to handle signals in a separate thread means that your signal-handling thread is already independently available to cancel the job that is underway in the primary thread.
The intention of setting up a threadpool is to submit another command in a seprate thread while there is already a command being processed. If we do it in one thread, then we will end up with deaklock eventually. Take dump as example: 1. submit command dump 2. polling for reply from RPC server 3. SIGINT(poll is interrupted) 4. submit command domjobabort form signal handler 5. (in remoteIO())there is a priv->waitDispatch, so go to sleep(vifCondWait). The impl of remote driver is thread-aware, and this thread (processing command domjobabort) is supposed to be waken up by the thread that is having priv->waitDispatch(processing command dump). But we are the same thread, so deadlock. Well, I have to say that the patch is nasty. Do you (or anyone) have a good idea to achieve the goal to cancel the active job if user presses Ctrl-C? Thanks.
+ } while (cmds); + } + VIR_FREE(cmdStr); + return; +error: + virMutexUnlock(&ctl->mutex); + VIR_FREE(cmdStr); +} +#endif
/* * vshSetupSignals: @@ -520,17 +567,20 @@ static void vshSetupSignals(void) { struct sigaction sig_action;
- sig_action.sa_sigaction = vshCatchDisconnect; sig_action.sa_flags = SA_SIGINFO; sigemptyset(&sig_action.sa_mask);
+#ifdef SIGPIPE
Might be unnecessary - gnulib guarantees that a working replacement for SIGPIPE is available on mingw, which is the only platform that lacks it natively. Oh, but that module is currently LGPLv3+, I'll have to ask on the gnulib list if people are willing to relax it to LGPLv2+ so we can use it.
-- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org
-- Thanks, Hu Tao

On Thu, Dec 23, 2010 at 05:07:34PM +0800, Hu Tao wrote:
--- src/remote/remote_driver.c | 2 +- tools/virsh.c | 126 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 110 insertions(+), 18 deletions(-)
diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index ee2de4a..d24d54f 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -10122,7 +10122,7 @@ remoteIOEventLoop(virConnectPtr conn,
repoll: ret = poll(fds, ARRAY_CARDINALITY(fds), -1); - if (ret < 0 && errno == EAGAIN) + if (ret < 0 && (errno == EAGAIN || errno == EINTR)) goto repoll;
I'm not convinced this is a good change. It makes it impossible to interrupt libvirt clients when they're doing remote I/O. Yes, you're adding special SIGINT handling to virsh to provide an alternative way to stop things, but this only applies to migration/dump. So a user won't be able to Ctrl-C virsh on other commands, nor for any non-virsh applications. In essence, this reverts the previous fix: commit 47fec8eac2bb38246addb0a0cc368fdbadad4738 Author: Daniel Veillard <veillard@redhat.com> Date: Fri Oct 30 12:08:26 2009 +0100 Remote code caught EINTR making it ininterruptable John Levon raised the issue that remoteIOEventLoop() poll call was reissued after EINTR was caught making it uninterruptible.
#ifdef HAVE_PTHREAD_SIGMASK diff --git a/tools/virsh.c b/tools/virsh.c index 8c123bb..efc7d1a 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -54,6 +54,9 @@ #include "files.h" #include "../daemon/event.h" #include "configmake.h" +#include "threads.h" +#include "threadpool.h" +#include "datatypes.h"
static char *progname;
@@ -208,6 +211,7 @@ typedef struct __vshCmd { typedef struct __vshControl { char *name; /* connection name */ virConnectPtr conn; /* connection to hypervisor (MAY BE NULL) */ + virDomainPtr dom; vshCmd *cmd; /* the current command */ char *cmdstr; /* string with command */ int imode; /* interactive mode? */ @@ -221,6 +225,9 @@ typedef struct __vshControl { int log_fd; /* log file descriptor */ char *historydir; /* readline history directory name */ char *historyfile; /* readline history file name */ + + virMutex mutex; + virThreadPoolPtr threadPool; } __vshControl;
I'm not really understanding why we need to introduce all this thread pool complexity just to be able to catch Ctrl-C and run virJobAbort ? If we already have a thread running to handle migration, monitoring progress info, then that existing thread could just check a global variable to see whether a sigint has occurred. Regards, Daniel

While migration/dump is in progress and virsh is waiting for its completion, user may want to terminate the progress by pressing Ctrl-C. But virsh just exits on user's Ctrl-C leaving migration/dump in background that user isn't even aware of. It's not reasonable. This patch changes the behaviour for migration/dump. For other commands Ctrl-C still terminates virsh itself. --- Hi Daniel, Eric, This patch is entirely different from my previous implementation so not titled with v2. It's simpler than the previous one and introduces less changes: not introducing threadpool in virsh;Ctrl-C remains terminating virsh if no job in progress. Thanks for your review of the previous patch. src/remote/remote_driver.c | 9 +++++++-- tools/virsh.c | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index ee2de4a..59ec486 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -9810,7 +9810,7 @@ processCallDispatchReply(virConnectPtr conn ATTRIBUTE_UNUSED, remoteError(VIR_ERR_RPC, _("no call waiting for reply with serial %d"), hdr->serial); - return -1; + return -2; } if (hdr->proc != thecall->proc_nr) { @@ -10160,7 +10160,12 @@ remoteIOEventLoop(virConnectPtr conn, } if (fds[0].revents & POLLIN) { - if (remoteIOHandleInput(conn, priv, flags) < 0) + int ret = remoteIOHandleInput(conn, priv, flags); + if (ret == -2) { + /* Not expected result, repoll */ + remoteDriverUnlock(priv); + goto repoll; + } else if (ret < 0) goto error; } diff --git a/tools/virsh.c b/tools/virsh.c index 55e2a68..e4d431e 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -492,6 +492,15 @@ out: last_error = NULL; } +static bool intCatched = FALSE; + +static void vshCatchInt(int sig ATTRIBUTE_UNUSED, + siginfo_t *siginfo ATTRIBUTE_UNUSED, + void *context ATTRIBUTE_UNUSED) +{ + intCatched = TRUE; +} + /* * Detection of disconnections and automatic reconnection support */ @@ -1838,6 +1847,9 @@ cmdDump(vshControl *ctl, const vshCmd *cmd) char *to; int ret = TRUE; int flags = 0; + int result; + struct sigaction sig_action; + struct sigaction old_sig_action; if (!vshConnectionUsability(ctl, ctl->conn)) return FALSE; @@ -1848,18 +1860,27 @@ cmdDump(vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain(ctl, cmd, &name))) return FALSE; + sig_action.sa_sigaction = vshCatchInt; + sig_action.sa_flags = SA_SIGINFO; + sigemptyset(&sig_action.sa_mask); + sigaction(SIGINT, &sig_action, &old_sig_action); + if (vshCommandOptBool (cmd, "live")) flags |= VIR_DUMP_LIVE; if (vshCommandOptBool (cmd, "crash")) flags |= VIR_DUMP_CRASH; - if (virDomainCoreDump(dom, to, flags) == 0) { + result = virDomainCoreDump(dom, to, flags); + if (result == 0) { vshPrint(ctl, _("Domain %s dumped to %s\n"), name, to); + } else if (intCatched) { + virDomainAbortJob(dom); } else { vshError(ctl, _("Failed to core dump domain %s to %s"), name, to); ret = FALSE; } + sigaction(SIGINT, &old_sig_action, NULL); virDomainFree(dom); return ret; } @@ -3388,6 +3409,9 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) const char *desturi; const char *migrateuri; const char *dname; + int result; + struct sigaction sig_action; + struct sigaction old_sig_action; int flags = 0, found, ret = FALSE; if (!vshConnectionUsability (ctl, ctl->conn)) @@ -3396,6 +3420,11 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain (ctl, cmd, NULL))) return FALSE; + sig_action.sa_sigaction = vshCatchInt; + sig_action.sa_flags = SA_SIGINFO; + sigemptyset(&sig_action.sa_mask); + sigaction(SIGINT, &sig_action, &old_sig_action); + desturi = vshCommandOptString (cmd, "desturi", &found); if (!found) goto done; @@ -3437,6 +3466,8 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (virDomainMigrateToURI (dom, desturi, flags, dname, 0) == 0) ret = TRUE; + else if (intCatched) + virDomainAbortJob(dom); } else { /* For traditional live migration, connect to the destination host directly. */ virConnectPtr dconn = NULL; @@ -3449,11 +3480,13 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (ddom) { virDomainFree(ddom); ret = TRUE; - } + } else if (intCatched) + virDomainAbortJob(dom); virConnectClose (dconn); } done: + sigaction(SIGINT, &old_sig_action, NULL); if (dom) virDomainFree (dom); return ret; } -- 1.7.3.1 -- Thanks, Hu Tao

On Thu, Jan 06, 2011 at 05:16:53PM +0800, Hu Tao wrote:
While migration/dump is in progress and virsh is waiting for its completion, user may want to terminate the progress by pressing Ctrl-C. But virsh just exits on user's Ctrl-C leaving migration/dump in background that user isn't even aware of. It's not reasonable.
This patch changes the behaviour for migration/dump. For other commands Ctrl-C still terminates virsh itself. --- Hi Daniel, Eric,
This patch is entirely different from my previous implementation so not titled with v2. It's simpler than the previous one and introduces less changes: not introducing threadpool in virsh;Ctrl-C remains terminating virsh if no job in progress.
Thanks for your review of the previous patch.
src/remote/remote_driver.c | 9 +++++++-- tools/virsh.c | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index ee2de4a..59ec486 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -9810,7 +9810,7 @@ processCallDispatchReply(virConnectPtr conn ATTRIBUTE_UNUSED, remoteError(VIR_ERR_RPC, _("no call waiting for reply with serial %d"), hdr->serial); - return -1; + return -2; }
if (hdr->proc != thecall->proc_nr) { @@ -10160,7 +10160,12 @@ remoteIOEventLoop(virConnectPtr conn, }
if (fds[0].revents & POLLIN) { - if (remoteIOHandleInput(conn, priv, flags) < 0) + int ret = remoteIOHandleInput(conn, priv, flags); + if (ret == -2) { + /* Not expected result, repoll */ + remoteDriverUnlock(priv); + goto repoll; + } else if (ret < 0) goto error; }
I don't think this change is correct. The error scenario shown in the first chunk is one that is hit when you have malformed data on the wire. As such I don't believe it is safe to treat this as a non-fatal recoverable error. Hitting Ctrl-C may well cause this path to behave as you desire, but there are other non-Ctrl-C based scenarios in which is it incorrect. While I think the threadpool stuff was overkill, we do still need 1 extra thread and I think the solution / patch overlaps with the solution/patch proposed by Wen Congyang a few days ago. eg, we can create a setup that looks like this which I think will solve both your own & Wen's needs for migration in virsh. * Thread a (Runs the migration) pthread_sigmask(mask with SIGINT blocked) virDomainMigrate() pthread_sigmask(original mask) * Thread b (Monitors/controls the migration) gettimeofday(start) while (1) { gettimeofday(now) if (now - start) > timeout force guest to offline migrate if intCatched == TRUE abort migrate if --verbose pthread_sigmask(mask with SIGINT blocked) virDomainGetJobInfo() pthread_sigmask(original mask) print out progress info on console } Since thread a blocks Ctrl-C, the signal will get delivered to thread b instead, possibly delayed a little if in the (short) virDomainGetJobInfo call. Thus we don't need any changes to remote_driver.c at all, and virsh can handle Ctrl-C on its own.
diff --git a/tools/virsh.c b/tools/virsh.c index 55e2a68..e4d431e 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -492,6 +492,15 @@ out: last_error = NULL; }
+static bool intCatched = FALSE; + +static void vshCatchInt(int sig ATTRIBUTE_UNUSED, + siginfo_t *siginfo ATTRIBUTE_UNUSED, + void *context ATTRIBUTE_UNUSED) +{ + intCatched = TRUE; +} + /* * Detection of disconnections and automatic reconnection support */ @@ -1838,6 +1847,9 @@ cmdDump(vshControl *ctl, const vshCmd *cmd) char *to; int ret = TRUE; int flags = 0; + int result; + struct sigaction sig_action; + struct sigaction old_sig_action;
if (!vshConnectionUsability(ctl, ctl->conn)) return FALSE; @@ -1848,18 +1860,27 @@ cmdDump(vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain(ctl, cmd, &name))) return FALSE;
+ sig_action.sa_sigaction = vshCatchInt; + sig_action.sa_flags = SA_SIGINFO; + sigemptyset(&sig_action.sa_mask); + sigaction(SIGINT, &sig_action, &old_sig_action); + if (vshCommandOptBool (cmd, "live")) flags |= VIR_DUMP_LIVE; if (vshCommandOptBool (cmd, "crash")) flags |= VIR_DUMP_CRASH;
- if (virDomainCoreDump(dom, to, flags) == 0) { + result = virDomainCoreDump(dom, to, flags); + if (result == 0) { vshPrint(ctl, _("Domain %s dumped to %s\n"), name, to); + } else if (intCatched) { + virDomainAbortJob(dom); } else { vshError(ctl, _("Failed to core dump domain %s to %s"), name, to); ret = FALSE; }
+ sigaction(SIGINT, &old_sig_action, NULL); virDomainFree(dom); return ret; } @@ -3388,6 +3409,9 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) const char *desturi; const char *migrateuri; const char *dname; + int result; + struct sigaction sig_action; + struct sigaction old_sig_action; int flags = 0, found, ret = FALSE;
if (!vshConnectionUsability (ctl, ctl->conn)) @@ -3396,6 +3420,11 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (!(dom = vshCommandOptDomain (ctl, cmd, NULL))) return FALSE;
+ sig_action.sa_sigaction = vshCatchInt; + sig_action.sa_flags = SA_SIGINFO; + sigemptyset(&sig_action.sa_mask); + sigaction(SIGINT, &sig_action, &old_sig_action); + desturi = vshCommandOptString (cmd, "desturi", &found); if (!found) goto done; @@ -3437,6 +3466,8 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd)
if (virDomainMigrateToURI (dom, desturi, flags, dname, 0) == 0) ret = TRUE; + else if (intCatched) + virDomainAbortJob(dom); } else { /* For traditional live migration, connect to the destination host directly. */ virConnectPtr dconn = NULL; @@ -3449,11 +3480,13 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (ddom) { virDomainFree(ddom); ret = TRUE; - } + } else if (intCatched) + virDomainAbortJob(dom); virConnectClose (dconn); }
done: + sigaction(SIGINT, &old_sig_action, NULL); if (dom) virDomainFree (dom); return ret; }
Regards, Daniel

While migration is in progress and virsh is waiting for its completion, user may want to terminate the progress by pressing Ctrl-C. But virsh just exits on user's Ctrl-C leaving migration in background that user isn't even aware of. It's not reasonable. This patch lets migration be terminated if user presses Ctrl-C while migration is in progress. virsh's behaviour is not affected when executing other commands. --- Hi Daniel, This patch does what you described(but lack of progress info). The timeout thing can be added later if this patch is acceptable. tools/virsh.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 108 insertions(+), 11 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index 55e2a68..cc9f26a 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -31,6 +31,7 @@ #include <sys/stat.h> #include <inttypes.h> #include <signal.h> +#include <poll.h> #include <libxml/parser.h> #include <libxml/tree.h> @@ -54,6 +55,7 @@ #include "files.h" #include "../daemon/event.h" #include "configmake.h" +#include "threads.h" static char *progname; @@ -492,6 +494,15 @@ out: last_error = NULL; } +static bool intCatched = FALSE; + +static void vshCatchInt(int sig ATTRIBUTE_UNUSED, + siginfo_t *siginfo ATTRIBUTE_UNUSED, + void *context ATTRIBUTE_UNUSED) +{ + intCatched = TRUE; +} + /* * Detection of disconnections and automatic reconnection support */ @@ -3381,24 +3392,38 @@ static const vshCmdOptDef opts_migrate[] = { {NULL, 0, 0, NULL} }; -static int -cmdMigrate (vshControl *ctl, const vshCmd *cmd) +static void +doMigrate (void *opaque) { + char ret = '1'; virDomainPtr dom = NULL; const char *desturi; const char *migrateuri; const char *dname; - int flags = 0, found, ret = FALSE; + int flags = 0, found; + sigset_t sigmask, oldsigmask; + struct { + vshControl *ctl; + vshCmd *cmd; + int writefd; + } *data = opaque; + vshControl *ctl = data->ctl; + vshCmd *cmd = data->cmd; + + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGINT); + if (pthread_sigmask(SIG_BLOCK, &sigmask, &oldsigmask) < 0) + goto out_sig; if (!vshConnectionUsability (ctl, ctl->conn)) - return FALSE; + goto out; if (!(dom = vshCommandOptDomain (ctl, cmd, NULL))) - return FALSE; + goto out; desturi = vshCommandOptString (cmd, "desturi", &found); if (!found) - goto done; + goto out; migrateuri = vshCommandOptString (cmd, "migrateuri", NULL); @@ -3432,29 +3457,101 @@ cmdMigrate (vshControl *ctl, const vshCmd *cmd) if (migrateuri != NULL) { vshError(ctl, "%s", _("migrate: Unexpected migrateuri for peer2peer/direct migration")); - goto done; + goto out; } if (virDomainMigrateToURI (dom, desturi, flags, dname, 0) == 0) - ret = TRUE; + ret = '0'; } else { /* For traditional live migration, connect to the destination host directly. */ virConnectPtr dconn = NULL; virDomainPtr ddom = NULL; dconn = virConnectOpenAuth (desturi, virConnectAuthPtrDefault, 0); - if (!dconn) goto done; + if (!dconn) goto out; ddom = virDomainMigrate (dom, dconn, flags, dname, migrateuri, 0); if (ddom) { virDomainFree(ddom); - ret = TRUE; + ret = '0'; } virConnectClose (dconn); } - done: +out: + pthread_sigmask(SIG_BLOCK, &oldsigmask, NULL); +out_sig: if (dom) virDomainFree (dom); + ignore_value(safewrite(data->writefd, &ret, sizeof(ret))); +} + +static int +cmdMigrate (vshControl *ctl, const vshCmd *cmd) +{ + int p[2]; + virThread workerThread; + struct pollfd pollfd; + int ret; + char retchar; + struct sigaction sig_action; + struct sigaction old_sig_action; + + struct { + vshControl *ctl; + vshCmd *cmd; + int writefd; + } data; + + if (pipe(p) < 0) + return -1; + + data.ctl = ctl; + data.cmd = cmd; + data.writefd = p[1]; + + if (virThreadCreate(&workerThread, + true, + doMigrate, + &data) < 0) + return -1; + + intCatched = FALSE; + sig_action.sa_sigaction = vshCatchInt; + sig_action.sa_flags = SA_SIGINFO; + sigemptyset(&sig_action.sa_mask); + sigaction(SIGINT, &sig_action, &old_sig_action); + + pollfd.fd = p[0]; + pollfd.events = POLLIN; + pollfd.revents = 0; + + ret = poll(&pollfd, 1, -1); + if (ret > 0) { + if (saferead(p[0], &retchar, sizeof(retchar)) > 0) { + if (retchar == '0') + ret = 0; + else + ret = -1; + } else + ret = -1; + } else if (ret < 0) { + if (errno == EINTR && intCatched) { + virDomainPtr dom; + + if ((dom = vshCommandOptDomain (ctl, cmd, NULL))) { + virDomainAbortJob(dom); + virDomainFree(dom); + } + intCatched = FALSE; + } + } else { + /* timed out */ + ret = -1; + } + + sigaction(SIGINT, &old_sig_action, NULL); + + virThreadJoin(&workerThread); return ret; } -- 1.7.3.1 -- Thanks, Hu Tao

On 12/23/2010 02:07 AM, Hu Tao wrote:
---
No commit message besides the title?
@@ -188,9 +193,14 @@ void virThreadPoolFree(virThreadPoolPtr pool) }
int virThreadPoolSendJob(virThreadPoolPtr pool, - void *jobData) + void *jobData, + bool waitForCompletion) {
This doesn't make sense to me. If you know you want to wait for a function to finish, then call the function directly rather than creating a job and passing it off to another thread just to block on that thread.
+++ b/src/util/threadpool.h @@ -41,7 +41,8 @@ virThreadPoolPtr virThreadPoolNew(size_t minWorkers, void virThreadPoolFree(virThreadPoolPtr pool);
int virThreadPoolSendJob(virThreadPoolPtr pool, - void *jobdata) ATTRIBUTE_NONNULL(1) + void *jobdata, + bool waitForCompletion) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
Whether or not you can convince me why this patch is necessary, there's an independent bug - jobdata is supposed to be opaque, which means it is acceptable to pass NULL as a jobdata, so we should remove the ATTRIBUTE_NONNULL(2). -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org

Do nothing if caller passes NULL as jobdata to virThreadPoolSendJob(). --- src/util/threadpool.c | 3 +++ src/util/threadpool.h | 1 - 2 files changed, 3 insertions(+), 1 deletions(-) diff --git a/src/util/threadpool.c b/src/util/threadpool.c index 1213862..2907478 100644 --- a/src/util/threadpool.c +++ b/src/util/threadpool.c @@ -192,6 +192,9 @@ int virThreadPoolSendJob(virThreadPoolPtr pool, { virThreadPoolJobPtr job; + if (!jobData) + return -1; + virMutexLock(&pool->mutex); if (pool->quit) goto error; diff --git a/src/util/threadpool.h b/src/util/threadpool.h index 5714b0b..8b8c676 100644 --- a/src/util/threadpool.h +++ b/src/util/threadpool.h @@ -42,7 +42,6 @@ void virThreadPoolFree(virThreadPoolPtr pool); int virThreadPoolSendJob(virThreadPoolPtr pool, void *jobdata) ATTRIBUTE_NONNULL(1) - ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK; #endif -- 1.7.3.1 -- Thanks, Hu Tao

On 12/26/2010 11:29 PM, Hu Tao wrote:
Do nothing if caller passes NULL as jobdata to virThreadPoolSendJob(). --- src/util/threadpool.c | 3 +++ src/util/threadpool.h | 1 - 2 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/src/util/threadpool.c b/src/util/threadpool.c index 1213862..2907478 100644 --- a/src/util/threadpool.c +++ b/src/util/threadpool.c @@ -192,6 +192,9 @@ int virThreadPoolSendJob(virThreadPoolPtr pool, { virThreadPoolJobPtr job;
+ if (!jobData) + return -1; +
This hunk is wrong. jobData is opaque - we cannot assume anything about it's meaning. A job may very well have something to do even with a NULL job data, and refusing to run a job just because one of its two opaque arguments is NULL is no better than marking the parameter as ATTRIBUTE_NONNULL when registering the job.
+++ b/src/util/threadpool.h @@ -42,7 +42,6 @@ void virThreadPoolFree(virThreadPoolPtr pool);
int virThreadPoolSendJob(virThreadPoolPtr pool, void *jobdata) ATTRIBUTE_NONNULL(1) - ATTRIBUTE_NONNULL(2) ATTRIBUTE_RETURN_CHECK;
But this hunk is okay. I've applied just this hunk, as it is technically a bug fix (over-strict compilation markings), and minimal impact for inclusion in 0.8.7. -- Eric Blake eblake@redhat.com +1-801-349-2682 Libvirt virtualization library http://libvirt.org
participants (3)
-
Daniel P. Berrange
-
Eric Blake
-
Hu Tao