On 28.11.2013 17:19, Daniel P. Berrange wrote:
On Thu, Nov 28, 2013 at 05:06:09PM +0100, Michal Privoznik wrote:
>
https://bugzilla.redhat.com/show_bug.cgi?id=1033061
>
> Since our transformation into virObject is not complete and we must do
> ref and unref ourselves there's a chance that we will get it wrong. That
> is, while one thread is doing unref and subsequent dispose another
> thread may come and do the ref & unref on stale pointer. This results in
> dispose being called twice (and possibly simultaneously). These kind of
> errors are hard to catch so we should at least throw an error into logs
> if such situation occurs. In fact, I've seen a stack trace showing this
> error had happen (obj = 0x7f4968018260):
>
> Thread 2 (Thread 0x7f498596b880 (LWP 13394)):
> [...]
> #4 0x00007f496ff44607 in ncf_close (ncf=0x7f496801e3d0) at netcf.c:101
> __PRETTY_FUNCTION__ = "ncf_close"
> #5 0x00007f4984f3f31b in virObjectUnref (anyobj=<optimized out>) at
util/virobject.c:262
> klass = 0x7f4968019020
> obj = 0x7f4968018260
> lastRef = true
> __func__ = "virObjectUnref"
> #6 0x00007f4970159785 in netcfStateCleanup () at
interface/interface_backend_netcf.c:109
> No locals.
> #7 0x00007f4984fc0e28 in virStateCleanup () at libvirt.c:883
> i = 6
> ret = 0
> #8 0x00007f49859b55fd in main (argc=<optimized out>, argv=<optimized
out>) at libvirtd.c:1549
> srv = 0x7f498696ed00
> remote_config_file = 0x0
> statuswrite = -1
> ret = <optimized out>
> pid_file_fd = 5
> pid_file = 0x0
> sock_file = 0x0
> sock_file_ro = 0x0
> timeout = -1
> verbose = 0
> godaemon = 0
> ipsock = 0
> config = 0x7f498696e760
> privileged = <optimized out>
> implicit_conf = <optimized out>
> run_dir = 0x0
> old_umask = <optimized out>
> opts = {{name = 0x7f49859e2b53 "verbose", has_arg = 0, flag =
0x7fff45057808, val = 118}, {name = 0x7f49859e2b5b "daemon", has_arg = 0, flag =
0x7fff4505780c, val = 100}, {name = 0x7f49859e2b62 "listen", has_arg = 0, flag =
0x7fff45057810, val = 108}, {name = 0x7f49859e2c9f "config", has_arg = 1, flag =
0x0, val = 102}, {name = 0x7f49859e2c00 "timeout", has_arg = 1, flag = 0x0, val
= 116}, {name = 0x7f49859e2b69 "pid-file", has_arg = 1, flag = 0x0, val = 112},
{name = 0x7f49859e2b72 "version", has_arg = 0, flag = 0x0, val = 86}, {name =
0x7f49859e2b7a "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x0, has_arg
= 0, flag = 0x0, val = 0}}
> __func__ = "main"
>
> Thread 1 (Thread 0x7f496d6a4700 (LWP 13405)):
> [...]
> #6 0x00007f496ff44607 in ncf_close (ncf=0x7f496801e3d0) at netcf.c:101
> __PRETTY_FUNCTION__ = "ncf_close"
> #7 0x00007f4984f3f31b in virObjectUnref (anyobj=<optimized out>) at
util/virobject.c:262
> klass = 0x7f4968019020
> obj = 0x7f4968018260
> lastRef = true
> __func__ = "virObjectUnref"
> #8 0x00007f4970157752 in netcfInterfaceClose (conn=0x7f49680a3260) at
interface/interface_backend_netcf.c:265
> No locals.
> #9 0x00007f4984fb7984 in virConnectDispose (obj=0x7f49680a3260) at datatypes.c:149
> conn = 0x7f49680a3260
> #10 0x00007f4984f3f31b in virObjectUnref (anyobj=anyobj@entry=0x7f49680a3260) at
util/virobject.c:262
> klass = 0x7f49681d6760
> obj = 0x7f49680a3260
> lastRef = true
> __func__ = "virObjectUnref"
> #11 0x00007f4984fc11bf in virConnectClose (conn=conn@entry=0x7f49680a3260) at
libvirt.c:1532
> __func__ = "virConnectClose"
> __FUNCTION__ = "virConnectClose"
> #12 0x00007f496eced281 in virLXCProcessAutostartAll (driver=0x7f49681ffaa0) at
lxc/lxc_process.c:1426
> conn = 0x7f49680a3260
> data = {driver = 0x7f49681ffaa0, conn = 0x7f49680a3260}
> #13 0x00007f4984fc0d21 in virStateInitialize (privileged=true,
callback=callback@entry=0x7f49859b7180 <daemonInhibitCallback>,
opaque=opaque@entry=0x7f498696ed00) at libvirt.c:864
> i = 8
> __func__ = "virStateInitialize"
> #14 0x00007f49859b71db in daemonRunStateInit (opaque=opaque@entry=0x7f498696ed00) at
libvirtd.c:906
> srv = 0x7f498696ed00
> sysident = 0x7f4968000ae0
> __func__ = "daemonRunStateInit"
> #15 0x00007f4984f4efe1 in virThreadHelper (data=<optimized out>) at
util/virthreadpthread.c:161
> args = 0x0
> local = {func = 0x7f49859b71a0 <daemonRunStateInit>, opaque =
0x7f498696ed00}
> #16 0x00007f4982a40de3 in start_thread (arg=0x7f496d6a4700) at pthread_create.c:308
> __res = <optimized out>
> pd = 0x7f496d6a4700
> now = <optimized out>
> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139953345021696,
7118305506502180663, 0, 139953345022400, 139953345021696, 140734351374104,
-7179978120810397897, -7180347023594422473}, mask_was_saved = 0}}, priv = {pad = {0x0,
0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
> not_first_call = <optimized out>
> pagesize_m1 = <optimized out>
> sp = <optimized out>
> freesize = <optimized out>
> #17 0x00007f498236716d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113
>
> Signed-off-by: Michal Privoznik <mprivozn(a)redhat.com>
> ---
> src/nwfilter/nwfilter_dhcpsnoop.c | 6 ++---
> src/util/viratomic.h | 53 +++++++++++++++++++++++++++++++--------
> src/util/virobject.c | 17 ++++++++++---
> 3 files changed, 58 insertions(+), 18 deletions(-)
>
> diff --git a/src/nwfilter/nwfilter_dhcpsnoop.c b/src/nwfilter/nwfilter_dhcpsnoop.c
> index e8fcfef..71380bb 100644
> --- a/src/nwfilter/nwfilter_dhcpsnoop.c
> +++ b/src/nwfilter/nwfilter_dhcpsnoop.c
> @@ -893,7 +893,7 @@ virNWFilterSnoopReqLeaseDel(virNWFilterSnoopReqPtr req,
> skip_instantiate:
> VIR_FREE(ipl);
>
> - virAtomicIntDecAndTest(&virNWFilterSnoopState.nLeases);
> + virAtomicIntDec(&virNWFilterSnoopState.nLeases);
>
> lease_not_found:
> VIR_FREE(ipstr);
> @@ -1169,7 +1169,7 @@ static void virNWFilterDHCPDecodeWorker(void *jobdata, void
*opaque)
> _("Instantiation of rules failed on "
> "interface '%s'"), req->ifname);
> }
> - virAtomicIntDecAndTest(job->qCtr);
> + virAtomicIntDec(job->qCtr);
> VIR_FREE(job);
> }
>
> @@ -1562,7 +1562,7 @@ exit:
> pcap_close(pcapConf[i].handle);
> }
>
> - virAtomicIntDecAndTest(&virNWFilterSnoopState.nThreads);
> + virAtomicIntDec(&virNWFilterSnoopState.nThreads);
>
> return;
> }
It seems these changes can be independant of this change.
Not really, since virAtomicIntDecAndTest is a macro now (or can be if
gcc extensions are used) then this while code will look like:
(virAtomicInDec(&var) == 0);
which makes my gcc complain.
> diff --git a/src/util/viratomic.h b/src/util/viratomic.h
> index 4d7f7e5..877900e 100644
> --- a/src/util/viratomic.h
> +++ b/src/util/viratomic.h
> @@ -68,6 +68,18 @@ VIR_STATIC int virAtomicIntInc(volatile int *atomic)
> ATTRIBUTE_NONNULL(1);
>
> /**
> + * virAtomicIntDec:
> + * Decrements the value of atomic by 1.
> + *
> + * Think of this operation as an atomic version of
> + * { *atomic -= 1; return *atomic == 0; }
> + *
> + * This call acts as a full compiler and hardware memory barrier.
> + */
> +VIR_STATIC int virAtomicIntDec(volatile int *atomic)
> + ATTRIBUTE_NONNULL(1);
> +
> +/**
> * virAtomicIntDecAndTest:
> * Decrements the value of atomic by 1.
> *
> @@ -75,6 +87,8 @@ VIR_STATIC int virAtomicIntInc(volatile int *atomic)
> * { *atomic -= 1; return *atomic == 0; }
> *
> * This call acts as a full compiler and hardware memory barrier.
> + * Returns true if the resulting decremented value is zero,
> + * false otherwise.
> */
> VIR_STATIC bool virAtomicIntDecAndTest(volatile int *atomic)
> ATTRIBUTE_NONNULL(1);
> @@ -176,12 +190,15 @@ VIR_STATIC unsigned int virAtomicIntXor(volatile unsigned int
*atomic,
> (void)(0 ? *(atomic) ^ *(atomic) : 0); \
> __sync_add_and_fetch((atomic), 1); \
> }))
> +# define virAtomicIntDec(atomic) \
> + (__extension__ ({ \
> + (void)verify_true(sizeof(*(atomic)) == sizeof(int)); \
> + (void)(0 ? *(atomic) ^ *(atomic) : 0); \
> + __sync_sub_and_fetch((atomic), 1); \
> + }))
> # define virAtomicIntDecAndTest(atomic) \
> - (__extension__ ({ \
> - (void)verify_true(sizeof(*(atomic)) == sizeof(int)); \
> - (void)(0 ? *(atomic) ^ *(atomic) : 0); \
> - __sync_fetch_and_sub((atomic), 1) == 1; \
> - }))
> + (virAtomicIntDec(atomic) == 0)
> +
> # define virAtomicIntCompareExchange(atomic, oldval, newval) \
> (__extension__ ({ \
> (void)verify_true(sizeof(*(atomic)) == sizeof(int)); \
> @@ -252,10 +269,16 @@ virAtomicIntInc(volatile int *atomic)
> return InterlockedIncrement((volatile LONG *)atomic);
> }
>
> +static inline int
> +virAtomicIntDec(volatile int *atomic)
> +{
> + return InterlockedDecrement((volatile LONG *)atomic);
> +}
> +
> static inline bool
> virAtomicIntDecAndTest(volatile int *atomic)
> {
> - return InterlockedDecrement((volatile LONG *)atomic) == 0;
> + return virAtomicIntDec(atomic) == 0;
> }
>
> static inline bool
> @@ -334,16 +357,22 @@ virAtomicIntInc(volatile int *atomic)
> return value;
> }
>
> -static inline bool
> -virAtomicIntDecAndTest(volatile int *atomic)
> +static inline int
> +virAtomicIntDec(volatile int *atomic)
> {
> - bool is_zero;
> + bool value;
>
> pthread_mutex_lock(&virAtomicLock);
> - is_zero = --(*atomic) == 0;
> + value = --(*atomic);
You're assigning an int to a bool here
> pthread_mutex_unlock(&virAtomicLock);
>
> - return is_zero;
> + return value;
And then turning a bool back into an int here
D'oh! I'll fix it and repost v2.