[libvirt] [PATCH 0/2] Two simple sparse streams fixes

I've been experimenting with sparse streams and found a bug. If you try to download a volume which doesn't support sparseness here's what happens: # virsh vol-download --sparse /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 /mnt/floppy/blah.raw # echo $? 0 # ls -lhs /mnt/floppy/bla.raw 0 -rw-r--r-- 1 root root 0 May 30 12:40 /mnt/floppy/bla.raw That's not good. iSCSI doesn't know anything about sparseness so an error is expected here. Fortunately, the fix is fairly simple: # virsh vol-download --sparse /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 /mnt/floppy/bla.raw error: cannot close volume /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 error: Unable to seek to data: Invalid argument Michal Privoznik (2): virfdstream: Check for thread error more frequently fdstream: Report error from the I/O thread daemon/stream.c | 18 ++++++++++++------ src/util/virfdstream.c | 22 ++++++++++++++++++++-- 2 files changed, 32 insertions(+), 8 deletions(-) -- 2.13.0

When the I/O thread quits (e.g. due to an I/O error, lseek() error, whatever), any subsequent virFDStream API should return error too. Moreover, when invoking stream event callback, we must set the VIR_STREAM_EVENT_ERROR flag so that the callback knows something bad happened. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virfdstream.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/util/virfdstream.c b/src/util/virfdstream.c index 7ee58be13..ebd0f6cf1 100644 --- a/src/util/virfdstream.c +++ b/src/util/virfdstream.c @@ -312,6 +312,9 @@ static void virFDStreamEvent(int watch ATTRIBUTE_UNUSED, return; } + if (fdst->threadErr) + events |= VIR_STREAM_EVENT_ERROR; + cb = fdst->cb; cbopaque = fdst->opaque; ff = fdst->ff; @@ -764,6 +767,9 @@ static int virFDStreamWrite(virStreamPtr st, const char *bytes, size_t nbytes) return -1; } + if (fdst->threadErr) + return -1; + if (!fdst) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("stream is not open")); @@ -844,6 +850,9 @@ static int virFDStreamRead(virStreamPtr st, char *bytes, size_t nbytes) return -1; } + if (fdst->threadErr) + return -1; + if (!fdst) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("stream is not open")); @@ -959,6 +968,9 @@ virFDStreamSendHole(virStreamPtr st, fdst->offset += length; } + if (fdst->threadErr) + goto cleanup; + if (fdst->thread) { /* Things are a bit complicated here. If FDStream is in a * read mode, then if the message at the queue head is @@ -1018,6 +1030,9 @@ virFDStreamInData(virStreamPtr st, virObjectLock(fdst); + if (fdst->threadErr) + goto cleanup; + if (fdst->thread) { virFDStreamMsgPtr msg; -- 2.13.0

On Tue, May 30, 2017 at 12:44:22PM +0200, Michal Privoznik wrote:
When the I/O thread quits (e.g. due to an I/O error, lseek() error, whatever), any subsequent virFDStream API should return error too. Moreover, when invoking stream event callback, we must set the VIR_STREAM_EVENT_ERROR flag so that the callback knows something bad happened.
Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virfdstream.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/src/util/virfdstream.c b/src/util/virfdstream.c index 7ee58be13..ebd0f6cf1 100644 --- a/src/util/virfdstream.c +++ b/src/util/virfdstream.c @@ -312,6 +312,9 @@ static void virFDStreamEvent(int watch ATTRIBUTE_UNUSED, return; }
+ if (fdst->threadErr) + events |= VIR_STREAM_EVENT_ERROR; + cb = fdst->cb; cbopaque = fdst->opaque; ff = fdst->ff; @@ -764,6 +767,9 @@ static int virFDStreamWrite(virStreamPtr st, const char *bytes, size_t nbytes) return -1; }
+ if (fdst->threadErr) + return -1; +
It feels like this should be done after locking the object.
if (!fdst) {
Not mentioning it looks like it can be NULL before this check.
virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("stream is not open")); @@ -844,6 +850,9 @@ static int virFDStreamRead(virStreamPtr st, char *bytes, size_t nbytes) return -1; }
+ if (fdst->threadErr) + return -1; + if (!fdst) {
Same here. I have no iSCSI to test it with, but it looks OK otherwise.

Problem with our error reporting is that the error object is a thread local variable. That means if there's an error reported within the I/O thread it gets logged and everything, but later when the event loop aborts the stream it doesn't see the original error. So we are left with some generic error. We can do better if we copy the error message between the threads. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- daemon/stream.c | 18 ++++++++++++------ src/util/virfdstream.c | 9 ++++++--- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/daemon/stream.c b/daemon/stream.c index 1d5b50ad7..5077ac8b0 100644 --- a/daemon/stream.c +++ b/daemon/stream.c @@ -231,17 +231,23 @@ daemonStreamEvent(virStreamPtr st, int events, void *opaque) int ret; virNetMessagePtr msg; virNetMessageError rerr; + virErrorPtr origErr = virSaveLastError(); memset(&rerr, 0, sizeof(rerr)); stream->closed = true; virStreamEventRemoveCallback(stream->st); virStreamAbort(stream->st); - if (events & VIR_STREAM_EVENT_HANGUP) - virReportError(VIR_ERR_RPC, - "%s", _("stream had unexpected termination")); - else - virReportError(VIR_ERR_RPC, - "%s", _("stream had I/O failure")); + if (origErr && origErr->code != VIR_ERR_OK) { + virSetError(origErr); + virFreeError(origErr); + } else { + if (events & VIR_STREAM_EVENT_HANGUP) + virReportError(VIR_ERR_RPC, + "%s", _("stream had unexpected termination")); + else + virReportError(VIR_ERR_RPC, + "%s", _("stream had I/O failure")); + } msg = virNetMessageNew(false); if (!msg) { diff --git a/src/util/virfdstream.c b/src/util/virfdstream.c index ebd0f6cf1..c1dbe0800 100644 --- a/src/util/virfdstream.c +++ b/src/util/virfdstream.c @@ -106,7 +106,7 @@ struct virFDStreamData { /* Thread data */ virThreadPtr thread; virCond threadCond; - int threadErr; + virErrorPtr threadErr; bool threadQuit; bool threadAbort; bool threadDoRead; @@ -123,6 +123,7 @@ virFDStreamDataDispose(void *obj) virFDStreamDataPtr fdst = obj; VIR_DEBUG("obj=%p", fdst); + virFreeError(fdst->threadErr); virFDStreamMsgQueueFree(&fdst->msg); } @@ -312,8 +313,10 @@ static void virFDStreamEvent(int watch ATTRIBUTE_UNUSED, return; } - if (fdst->threadErr) + if (fdst->threadErr) { events |= VIR_STREAM_EVENT_ERROR; + virSetError(fdst->threadErr); + } cb = fdst->cb; cbopaque = fdst->opaque; @@ -637,7 +640,7 @@ virFDStreamThread(void *opaque) return; error: - fdst->threadErr = errno; + fdst->threadErr = virSaveLastError(); goto cleanup; } -- 2.13.0

On Tue, May 30, 2017 at 12:44:21PM +0200, Michal Privoznik wrote:
I've been experimenting with sparse streams and found a bug. If you try to download a volume which doesn't support sparseness here's what happens:
# virsh vol-download --sparse /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 /mnt/floppy/blah.raw
# echo $? 0 # ls -lhs /mnt/floppy/bla.raw 0 -rw-r--r-- 1 root root 0 May 30 12:40 /mnt/floppy/bla.raw
That's not good. iSCSI doesn't know anything about sparseness so an error is expected here. Fortunately, the fix is fairly simple:
# virsh vol-download --sparse /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 /mnt/floppy/bla.raw error: cannot close volume /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 error: Unable to seek to data: Invalid argument
I'm also getting confusing errors when there is no space on the destination: error: cannot receive data from volume fedora.img error: An error occurred, but the cause is unknown But that's not related to the sparse streams (unless it was caused by making the iohelper a thread). ... few moments later after /me tries just a thing or two ... Well, this made me try out few more things and I've found out few things. I'm not sure what's related to your patches and what's not, so here's the rundown, and I'll let you decide: - vol-download --sparse --offset $source_file_size --length 1 /path/to/source.file destination.file - Every now and then (not always) it gets stuck waiting for the daemon to receive data (see backtrace below), but the daemon is not waiting for anything, it's just some weird race. We can try debugging it with wireshark later. That file ends with a hole. Thread 1 (Thread 0x7f1d2b434880 (LWP 28584)): #0 0x00007f1d2796efbd in poll () at ../sysdeps/unix/syscall-template.S:84 #1 0x00007f1d2a806ee3 in poll (__timeout=5000, __nfds=2, __fds=0x7ffe9effd640) at /usr/include/bits/poll2.h:46 #2 virNetClientIOEventLoop (client=client@entry=0x563525bb06d0, thiscall=thiscall@entry=0x563525badc00) at rpc/virnetclient.c:1664 #3 0x00007f1d2a8074d3 in virNetClientIO (client=client@entry=0x563525bb06d0, thiscall=0x563525badc00) at rpc/virnetclient.c:1957 #4 0x00007f1d2a80780e in virNetClientSendInternal (client=client@entry=0x563525bb06d0, msg=msg@entry=0x563525bb03d0, expectReply=expectReply@entry=true, nonBlock=nonBlock@entry=false) at rpc/virnetclient.c:2132 #5 0x00007f1d2a808dfc in virNetClientSendWithReplyStream (client=client@entry=0x563525bb06d0, msg=msg@entry=0x563525bb03d0, st=st@entry=0x563525bade10) at rpc/virnetclient.c:2236 #6 0x00007f1d2a80ab2d in virNetClientStreamRecvPacket (st=st@entry=0x563525bade10, client=0x563525bb06d0, data=data@entry=0x7f1d20686010 "", nbytes=nbytes@entry=262120, nonblock=false, flags=32766, flags@entry=1) at rpc/virnetclientstream.c:499 #7 0x00007f1d2a7e0e3e in remoteStreamRecvFlags (st=0x563525badc60, data=0x7f1d20686010 "", nbytes=262120, flags=1) at remote/remote_driver.c:5664 #8 0x00007f1d2a7c8347 in virStreamRecvFlags (stream=stream@entry=0x563525badc60, data=0x7f1d20686010 "", nbytes=nbytes@entry=262120, flags=flags@entry=1) at libvirt-stream.c:361 #9 0x00007f1d2a7c9b7f in virStreamSparseRecvAll (stream=stream@entry=0x563525badc60, handler=0x563525760196 <virshStreamSink>, holeHandler=0x56352576020b <virshStreamSkip>, opaque=opaque@entry=0x7ffe9effd954) at libvirt-stream.c:964 #10 0x000056352576232e in cmdVolDownload (ctl=0x7ffe9effda40, cmd=<optimized out>) at virsh-volume.c:834 #11 0x00005635257662f1 in vshCommandRun (ctl=0x7ffe9effda40, cmd=0x563525bacf40) at vsh.c:1327 #12 0x000056352572aee2 in main (argc=9, argv=<optimized out>) at virsh.c:929 Trying to reproduce yet another one, the command gets stuck even with different offsets. - vol-download --sparse --offset $X --length 1 /path/to/source.file destination.file - This does not respect the length if: X > $source_file_size - $last_hole_size The size ends up being $source_file_size - $X I'm afraid to try more things, but I can provide more info for these if you want. Have a nice day, Martin

On 05/31/2017 01:03 PM, Martin Kletzander wrote:
On Tue, May 30, 2017 at 12:44:21PM +0200, Michal Privoznik wrote:
I've been experimenting with sparse streams and found a bug. If you try to download a volume which doesn't support sparseness here's what happens:
# virsh vol-download --sparse /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 /mnt/floppy/blah.raw
# echo $? 0 # ls -lhs /mnt/floppy/bla.raw 0 -rw-r--r-- 1 root root 0 May 30 12:40 /mnt/floppy/bla.raw
That's not good. iSCSI doesn't know anything about sparseness so an error is expected here. Fortunately, the fix is fairly simple:
# virsh vol-download --sparse /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 /mnt/floppy/bla.raw error: cannot close volume /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0
error: Unable to seek to data: Invalid argument
I'm also getting confusing errors when there is no space on the destination: error: cannot receive data from volume fedora.img error: An error occurred, but the cause is unknown
Looks like one of the callbacks is not reporting errors.
But that's not related to the sparse streams (unless it was caused by making the iohelper a thread).
... few moments later after /me tries just a thing or two ...
Well, this made me try out few more things and I've found out few things. I'm not sure what's related to your patches and what's not, so here's the rundown, and I'll let you decide:
- vol-download --sparse --offset $source_file_size --length 1 /path/to/source.file destination.file
- Every now and then (not always) it gets stuck waiting for the daemon to receive data (see backtrace below), but the daemon is not waiting for anything, it's just some weird race. We can try debugging it with wireshark later. That file ends with a hole.
Thread 1 (Thread 0x7f1d2b434880 (LWP 28584)): #0 0x00007f1d2796efbd in poll () at ../sysdeps/unix/syscall-template.S:84 #1 0x00007f1d2a806ee3 in poll (__timeout=5000, __nfds=2, __fds=0x7ffe9effd640) at /usr/include/bits/poll2.h:46 #2 virNetClientIOEventLoop (client=client@entry=0x563525bb06d0, thiscall=thiscall@entry=0x563525badc00) at rpc/virnetclient.c:1664 #3 0x00007f1d2a8074d3 in virNetClientIO (client=client@entry=0x563525bb06d0, thiscall=0x563525badc00) at rpc/virnetclient.c:1957 #4 0x00007f1d2a80780e in virNetClientSendInternal (client=client@entry=0x563525bb06d0, msg=msg@entry=0x563525bb03d0, expectReply=expectReply@entry=true, nonBlock=nonBlock@entry=false) at rpc/virnetclient.c:2132 #5 0x00007f1d2a808dfc in virNetClientSendWithReplyStream (client=client@entry=0x563525bb06d0, msg=msg@entry=0x563525bb03d0, st=st@entry=0x563525bade10) at rpc/virnetclient.c:2236 #6 0x00007f1d2a80ab2d in virNetClientStreamRecvPacket (st=st@entry=0x563525bade10, client=0x563525bb06d0, data=data@entry=0x7f1d20686010 "", nbytes=nbytes@entry=262120, nonblock=false, flags=32766, flags@entry=1) at rpc/virnetclientstream.c:499 #7 0x00007f1d2a7e0e3e in remoteStreamRecvFlags (st=0x563525badc60, data=0x7f1d20686010 "", nbytes=262120, flags=1) at remote/remote_driver.c:5664 #8 0x00007f1d2a7c8347 in virStreamRecvFlags (stream=stream@entry=0x563525badc60, data=0x7f1d20686010 "", nbytes=nbytes@entry=262120, flags=flags@entry=1) at libvirt-stream.c:361 #9 0x00007f1d2a7c9b7f in virStreamSparseRecvAll (stream=stream@entry=0x563525badc60, handler=0x563525760196 <virshStreamSink>, holeHandler=0x56352576020b <virshStreamSkip>, opaque=opaque@entry=0x7ffe9effd954) at libvirt-stream.c:964 #10 0x000056352576232e in cmdVolDownload (ctl=0x7ffe9effda40, cmd=<optimized out>) at virsh-volume.c:834 #11 0x00005635257662f1 in vshCommandRun (ctl=0x7ffe9effda40, cmd=0x563525bacf40) at vsh.c:1327 #12 0x000056352572aee2 in main (argc=9, argv=<optimized out>) at virsh.c:929
Trying to reproduce yet another one, the command gets stuck even with different offsets.
- vol-download --sparse --offset $X --length 1 /path/to/source.file destination.file
- This does not respect the length if: X > $source_file_size - $last_hole_size
The size ends up being $source_file_size - $X
Okay, I'll look into these. Thanks.
I'm afraid to try more things, but I can provide more info for these if you want.
Don't be! At least somebody is testing the feature. Thanks. Anyway, I'll send v2 on 1/2. Michal

On Wednesday, 31 May 2017 13:03:38 CEST Martin Kletzander wrote:
- vol-download --sparse --offset $source_file_size --length 1 /path/to/source.file destination.file
- Every now and then (not always) it gets stuck waiting for the daemon to receive data (see backtrace below), but the daemon is not waiting for anything, it's just some weird race. We can try debugging it with wireshark later. That file ends with a hole.
Thread 1 (Thread 0x7f1d2b434880 (LWP 28584)): #0 0x00007f1d2796efbd in poll () at ../sysdeps/unix/syscall-template.S:84 #1 0x00007f1d2a806ee3 in poll (__timeout=5000, __nfds=2, __fds=0x7ffe9effd640) at /usr/include/bits/poll2.h:46 #2 virNetClientIOEventLoop (client=client@entry=0x563525bb06d0, thiscall=thiscall@entry=0x563525badc00) at rpc/virnetclient.c:1664 #3 0x00007f1d2a8074d3 in virNetClientIO (client=client@entry=0x563525bb06d0, thiscall=0x563525badc00) at rpc/virnetclient.c:1957 #4 0x00007f1d2a80780e in virNetClientSendInternal (client=client@entry=0x563525bb06d0, msg=msg@entry=0x563525bb03d0, expectReply=expectReply@entry=true, nonBlock=nonBlock@entry=false) at rpc/virnetclient.c:2132 #5 0x00007f1d2a808dfc in virNetClientSendWithReplyStream (client=client@entry=0x563525bb06d0, msg=msg@entry=0x563525bb03d0, st=st@entry=0x563525bade10) at rpc/virnetclient.c:2236 #6 0x00007f1d2a80ab2d in virNetClientStreamRecvPacket (st=st@entry=0x563525bade10, client=0x563525bb06d0, data=data@entry=0x7f1d20686010 "", nbytes=nbytes@entry=262120, nonblock=false, flags=32766, flags@entry=1) at rpc/virnetclientstream.c:499 #7 0x00007f1d2a7e0e3e in remoteStreamRecvFlags (st=0x563525badc60, data=0x7f1d20686010 "", nbytes=262120, flags=1) at remote/remote_driver.c:5664 #8 0x00007f1d2a7c8347 in virStreamRecvFlags (stream=stream@entry=0x563525badc60, data=0x7f1d20686010 "", nbytes=nbytes@entry=262120, flags=flags@entry=1) at libvirt-stream.c:361 #9 0x00007f1d2a7c9b7f in virStreamSparseRecvAll (stream=stream@entry=0x563525badc60, handler=0x563525760196 <virshStreamSink>, holeHandler=0x56352576020b <virshStreamSkip>, opaque=opaque@entry=0x7ffe9effd954) at libvirt-stream.c:964 #10 0x000056352576232e in cmdVolDownload (ctl=0x7ffe9effda40, cmd=<optimized out>) at virsh-volume.c:834 #11 0x00005635257662f1 in vshCommandRun (ctl=0x7ffe9effda40, cmd=0x563525bacf40) at vsh.c:1327 #12 0x000056352572aee2 in main (argc=9, argv=<optimized out>) at virsh.c:929
Trying to reproduce yet another one, the command gets stuck even with different offsets.
- vol-download --sparse --offset $X --length 1 /path/to/source.file destination.file
- This does not respect the length if: X > $source_file_size - $last_hole_size
The size ends up being $source_file_size - $X
Humble suggestion here: what about turning the simple scenarios above as proper tests? -- Pino Toscano

On Wed, May 31, 2017 at 03:08:16PM +0200, Pino Toscano wrote:
On Wednesday, 31 May 2017 13:03:38 CEST Martin Kletzander wrote:
- vol-download --sparse --offset $source_file_size --length 1 /path/to/source.file destination.file
- Every now and then (not always) it gets stuck waiting for the daemon to receive data (see backtrace below), but the daemon is not waiting for anything, it's just some weird race. We can try debugging it with wireshark later. That file ends with a hole.
Thread 1 (Thread 0x7f1d2b434880 (LWP 28584)): #0 0x00007f1d2796efbd in poll () at ../sysdeps/unix/syscall-template.S:84 #1 0x00007f1d2a806ee3 in poll (__timeout=5000, __nfds=2, __fds=0x7ffe9effd640) at /usr/include/bits/poll2.h:46 #2 virNetClientIOEventLoop (client=client@entry=0x563525bb06d0, thiscall=thiscall@entry=0x563525badc00) at rpc/virnetclient.c:1664 #3 0x00007f1d2a8074d3 in virNetClientIO (client=client@entry=0x563525bb06d0, thiscall=0x563525badc00) at rpc/virnetclient.c:1957 #4 0x00007f1d2a80780e in virNetClientSendInternal (client=client@entry=0x563525bb06d0, msg=msg@entry=0x563525bb03d0, expectReply=expectReply@entry=true, nonBlock=nonBlock@entry=false) at rpc/virnetclient.c:2132 #5 0x00007f1d2a808dfc in virNetClientSendWithReplyStream (client=client@entry=0x563525bb06d0, msg=msg@entry=0x563525bb03d0, st=st@entry=0x563525bade10) at rpc/virnetclient.c:2236 #6 0x00007f1d2a80ab2d in virNetClientStreamRecvPacket (st=st@entry=0x563525bade10, client=0x563525bb06d0, data=data@entry=0x7f1d20686010 "", nbytes=nbytes@entry=262120, nonblock=false, flags=32766, flags@entry=1) at rpc/virnetclientstream.c:499 #7 0x00007f1d2a7e0e3e in remoteStreamRecvFlags (st=0x563525badc60, data=0x7f1d20686010 "", nbytes=262120, flags=1) at remote/remote_driver.c:5664 #8 0x00007f1d2a7c8347 in virStreamRecvFlags (stream=stream@entry=0x563525badc60, data=0x7f1d20686010 "", nbytes=nbytes@entry=262120, flags=flags@entry=1) at libvirt-stream.c:361 #9 0x00007f1d2a7c9b7f in virStreamSparseRecvAll (stream=stream@entry=0x563525badc60, handler=0x563525760196 <virshStreamSink>, holeHandler=0x56352576020b <virshStreamSkip>, opaque=opaque@entry=0x7ffe9effd954) at libvirt-stream.c:964 #10 0x000056352576232e in cmdVolDownload (ctl=0x7ffe9effda40, cmd=<optimized out>) at virsh-volume.c:834 #11 0x00005635257662f1 in vshCommandRun (ctl=0x7ffe9effda40, cmd=0x563525bacf40) at vsh.c:1327 #12 0x000056352572aee2 in main (argc=9, argv=<optimized out>) at virsh.c:929
Trying to reproduce yet another one, the command gets stuck even with different offsets.
- vol-download --sparse --offset $X --length 1 /path/to/source.file destination.file
- This does not respect the length if: X > $source_file_size - $last_hole_size
The size ends up being $source_file_size - $X
Humble suggestion here: what about turning the simple scenarios above as proper tests?
The problem here is that after designing the test and writing it, we also have to mock all accesses to the source and destination files and report how the result looks, etc. And I didn't get to virStreams even, that's only sparse files. We could instead do integration testing of this, which would be easier, however you can only do that on a filesystem that you know keeps holes, plus the hole sizes can be different based on the block size, the files can be way different based on adaptive allocations, etc. There are so many factors for this that it is not easy (I'm not saying it's impossible). If I had lot of free time, this could fit in somehow. Also after I upgrade the virfilewrapper, it will be easier to control the behaviour of the file-access functions way more delicately. But patches are welcome! ;)
-- Pino Toscano
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list

On 05/31/2017 01:03 PM, Martin Kletzander wrote:
On Tue, May 30, 2017 at 12:44:21PM +0200, Michal Privoznik wrote:
I've been experimenting with sparse streams and found a bug. If you try to download a volume which doesn't support sparseness here's what happens:
# virsh vol-download --sparse /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 /mnt/floppy/blah.raw
# echo $? 0 # ls -lhs /mnt/floppy/bla.raw 0 -rw-r--r-- 1 root root 0 May 30 12:40 /mnt/floppy/bla.raw
That's not good. iSCSI doesn't know anything about sparseness so an error is expected here. Fortunately, the fix is fairly simple:
# virsh vol-download --sparse /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0 /mnt/floppy/bla.raw error: cannot close volume /dev/disk/by-path/ip-XX.XX.XX.XX:3260-iscsi-iqn.2017-03.com.blah:server-lun-0
error: Unable to seek to data: Invalid argument
I'm also getting confusing errors when there is no space on the destination: error: cannot receive data from volume fedora.img error: An error occurred, but the cause is unknown
I have a fix for this (will send shortly).
But that's not related to the sparse streams (unless it was caused by making the iohelper a thread).
... few moments later after /me tries just a thing or two ...
Well, this made me try out few more things and I've found out few things. I'm not sure what's related to your patches and what's not, so here's the rundown, and I'll let you decide:
- vol-download --sparse --offset $source_file_size --length 1 /path/to/source.file destination.file
- Every now and then (not always) it gets stuck waiting for the daemon to receive data (see backtrace below), but the daemon is not waiting for anything, it's just some weird race. We can try debugging it with wireshark later. That file ends with a hole.
But I do not have a fix for this one. Frankly, I have no idea what is going on. Looks to me like: a) the daemon reaches the end of the stream, but doesn't call virStreamFinish b) client reads all the incoming data from the stream, and after that finds incoming queue empty so it sends "gimme more data" packet to the daemon c) the I/O thread in the daemon has died already (it had read everything it was supposed to), so it closes the write end of the pipe d) the read end of the pipe is not added to the event loop since we are not expecting more data in the stream Frankly, I have no idea what is really going on or how to fix it. So if somebody else can take a look I'd appreciate it. Michal

When the I/O thread quits (e.g. due to an I/O error, lseek() error, whatever), any subsequent virFDStream API should return error too. Moreover, when invoking stream event callback, we must set the VIR_STREAM_EVENT_ERROR flag so that the callback knows something bad happened. Signed-off-by: Michal Privoznik <mprivozn@redhat.com> --- src/util/virfdstream.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/util/virfdstream.c b/src/util/virfdstream.c index 7ee58be13..cd24757e6 100644 --- a/src/util/virfdstream.c +++ b/src/util/virfdstream.c @@ -312,6 +312,9 @@ static void virFDStreamEvent(int watch ATTRIBUTE_UNUSED, return; } + if (fdst->threadErr) + events |= VIR_STREAM_EVENT_ERROR; + cb = fdst->cb; cbopaque = fdst->opaque; ff = fdst->ff; @@ -787,10 +790,10 @@ static int virFDStreamWrite(virStreamPtr st, const char *bytes, size_t nbytes) if (fdst->thread) { char *buf; - if (fdst->threadQuit) { + if (fdst->threadQuit || fdst->threadErr) { virReportSystemError(EBADF, "%s", _("cannot write to stream")); - return -1; + goto cleanup; } if (VIR_ALLOC(msg) < 0 || @@ -866,7 +869,7 @@ static int virFDStreamRead(virStreamPtr st, char *bytes, size_t nbytes) virFDStreamMsgPtr msg = NULL; while (!(msg = fdst->msg)) { - if (fdst->threadQuit) { + if (fdst->threadQuit || fdst->threadErr) { if (nbytes) { virReportSystemError(EBADF, "%s", _("stream is not open")); @@ -959,6 +962,9 @@ virFDStreamSendHole(virStreamPtr st, fdst->offset += length; } + if (fdst->threadErr) + goto cleanup; + if (fdst->thread) { /* Things are a bit complicated here. If FDStream is in a * read mode, then if the message at the queue head is @@ -1018,6 +1024,9 @@ virFDStreamInData(virStreamPtr st, virObjectLock(fdst); + if (fdst->threadErr) + goto cleanup; + if (fdst->thread) { virFDStreamMsgPtr msg; -- 2.13.0
participants (3)
-
Martin Kletzander
-
Michal Privoznik
-
Pino Toscano