On 25.01.2023 19:49, Martin Kletzander wrote:
> On Fri, Jan 20, 2023 at 04:50:51PM +0600, Oleg Vasilev wrote:
>>
>>
>> On 09.01.2023 18:31, Martin Kletzander wrote:
>>> On Thu, Dec 15, 2022 at 01:25:49AM +0600, Oleg Vasilev wrote:
>>>> Before, logs from deleted machines have been piling up, since there
>>>> were
>>>> no garbage collection mechanism. Now, virtlogd can be configured to
>>>> periodically scan the log folder for orphan logs with no recent
>>>> modifications
>>>> and delete it.
>>>>
>>>> A single chain of recent and rotated logs is deleted in a single
>>>> transaction.
>>>>
>>>
>>> This seems very complicated for what this is trying to do. Couple of
>>> notes below.
>>
>> That's true, but I don't see any other simpler approach, which achieves
>> the same result. We don't want the chains to be deleted in parts, so the
>> only way I see is with two phases:
>> - First, we aggregate information about the state of the filesystem
>> - Then we do the deletion
>>
>> Do you have any suggestions this can be simplified.
>>
>
> What would be the issue with deleting the chains in parts?
For an alive log chain, we want to keep the rotated files as history.
That was actually the issue raised by you: :)
Ok, I misunderstood "in parts" then, I guess. Anyway, I'll have a look
at the new version later this week, sorry for the stalling.
"One more thing is that if larger logs are rotated, then this
would
remove the old ones even if they are kept as number of backups. That
seems like something that should be avoided (probably not even
configurable)."
>
>>>
>>>> Signed-off-by: Oleg Vasilev <oleg.vasilev(a)virtuozzo.com>
>>>> ---
>>>> po/POTFILES | 1 +
>>>> src/logging/log_cleaner.c | 276 ++++++++++++++++++++++++++++++++++++++
>>>> src/logging/log_cleaner.h | 29 ++++
>>>> src/logging/log_handler.h | 2 +
>>>> src/logging/meson.build | 1 +
>>>> 5 files changed, 309 insertions(+)
>>>> create mode 100644 src/logging/log_cleaner.c
>>>> create mode 100644 src/logging/log_cleaner.h
>>>>
>>>> diff --git a/po/POTFILES b/po/POTFILES
>>>> index 169e2a41dc..2fb6d18e27 100644
>>>> --- a/po/POTFILES
>>>> +++ b/po/POTFILES
>>>> @@ -123,6 +123,7 @@ src/locking/lock_driver_lockd.c
>>>> src/locking/lock_driver_sanlock.c
>>>> src/locking/lock_manager.c
>>>> src/locking/sanlock_helper.c
>>>> +src/logging/log_cleaner.c
>>>> src/logging/log_daemon.c
>>>> src/logging/log_daemon_dispatch.c
>>>> src/logging/log_handler.c
>>>> diff --git a/src/logging/log_cleaner.c b/src/logging/log_cleaner.c
>>>> new file mode 100644
>>>> index 0000000000..03de9c17ee
>>>> --- /dev/null
>>>> +++ b/src/logging/log_cleaner.c
>>>> @@ -0,0 +1,276 @@
>>>> +/*
>>>> + * log_cleaner.c: cleans obsolete log files
>>>> + *
>>>> + * Copyright (C) 2022 Virtuozzo International GmbH
>>>> + *
>>>> + * This library is free software; you can redistribute it and/or
>>>> + * modify it under the terms of the GNU Lesser General Public
>>>> + * License as published by the Free Software Foundation; either
>>>> + * version 2.1 of the License, or (at your option) any later version.
>>>> + *
>>>> + * This library is distributed in the hope that it will be useful,
>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>>> + * Lesser General Public License for more details.
>>>> + *
>>>> + * You should have received a copy of the GNU Lesser General Public
>>>> + * License along with this library; If not, see
>>>> + * <
http://www.gnu.org/licenses/>.
>>>> + */
>>>> +
>>>> +#include <config.h>
>>>> +
>>>> +#include "log_cleaner.h"
>>>> +#include "log_handler.h"
>>>> +
>>>> +#include "virerror.h"
>>>> +#include "virobject.h"
>>>> +#include "virfile.h"
>>>> +#include "viralloc.h"
>>>> +#include "virlog.h"
>>>> +#include "virrotatingfile.h"
>>>> +#include "virstring.h"
>>>> +
>>>> +#define VIR_FROM_THIS VIR_FROM_LOGGING
>>>> +
>>>> +/* Cleanup log root (/var/log/libvirt) and all subfolders (e.g.
>>>> /var/log/libvirt/qemu) */
>>>> +#define CLEANER_LOG_DEPTH 1
>>>> +#define CLEANER_LOG_TIMEOUT_MS (24 * 3600 * 1000) /* One day */
>>>> +#define MAX_TIME ((time_t) G_MAXINT64)
>>>
>>> Unused
>>>
>>>> +
>>>> +static GRegex *log_regex;
>>>> +
>>>> +typedef struct _virLogCleanerChain virLogCleanerChain;
>>>> +struct _virLogCleanerChain {
>>>> + int rotated_max_index;
>>>> + time_t last_modified;
>>>> +};
>>>> +
>>>> +typedef struct _virLogCleanerData virLogCleanerData;
>>>> +struct _virLogCleanerData {
>>>> + virLogHandler *handler;
>>>> + time_t oldest_to_keep;
>>>> + GHashTable *chains;
>>>> +};
>>>> +
>>>> +static const char*
>>>> +virLogCleanerParseFilename(const char* path,
>>>> + int* rotated_index)
>>>> +{
>>>> + g_autoptr(GMatchInfo) matchInfo = NULL;
>>>> + g_autofree const char* rotated_index_str = NULL;
>>>> + g_autofree const char *clear_path = NULL;
>>>> + const char* chain_prefix = NULL;
>>>> +
>>>> + clear_path = realpath(path, NULL);
>>>> + if (!clear_path) {
>>>> + virReportSystemError(errno,
>>>> + _("Failed to resolve path
'%s'"),
>>>> + path);
>>>> + return NULL;
>>>> + }
>>>> +
>>>> + if (!g_regex_match(log_regex, path, 0, &matchInfo)) {
>>>> + return NULL;
>>>> + }
>>>> +
>>>> + chain_prefix = g_match_info_fetch(matchInfo, 1);
>>>> + if (!rotated_index)
>>>> + return chain_prefix;
>>>> +
>>>> + *rotated_index = 0;
>>>> + rotated_index_str = g_match_info_fetch(matchInfo, 2);
>>>
>>> You could just use 3 instead of 2 here and ...
>>>
>>>> +
>>>> + if (!rotated_index_str)
>>>> + return chain_prefix;
>>>> +
>>>> + if (virStrToLong_i(rotated_index_str+1, NULL, 10, rotated_index)
<
>>>> 0) {
>>>
>>> ... avoid this +1 here.
>>>
>>>> + virReportError(VIR_ERR_INTERNAL_ERROR,
>>>> + _("Failed to parse rotated index from
'%s'"),
>>>> + rotated_index_str);
>>>> + return NULL;
>>>> + }
>>>> + return chain_prefix;
>>>> +}
>>>> +
>>>> +static void
>>>> +virLogCleanerProcessFile(virLogCleanerData* data,
>>>> + const char* path,
>>>> + struct stat* sb)
>>>
>>> Inconsistency in the pointer declaration (asterisk position).
By the way, should we have it in linters somewhere?
>>>
>>>> +{
>>>> + int rotated_index = 0;
>>>> + g_autofree const char *chain_prefix = NULL;
>>>> + virLogCleanerChain *chain;
>>>> +
>>>> + if (!S_ISREG(sb->st_mode)) {
>>>> + return;
>>>> + }
>>>> +
>>>> + chain_prefix = virLogCleanerParseFilename(path,
&rotated_index);
>>>> +
>>>> + if (!chain_prefix) {
>>>> + return;
>>>> + }
>>>
>>> Here (and in bunch more places) the braces should be avoided.
>>>
>>>> +
>>>> + if (rotated_index > data->handler->config->max_backups)
{
>>>> + if (unlink(path) < 0) {
>>>> + virReportSystemError(errno, _("Unable to delete
%s"),
>>>> path);
>>>> + }
>>>> + return;
>>>> + }
>>>> +
>>>> + chain = g_hash_table_lookup(data->chains, chain_prefix);
>>>> +
>>>> + if (!chain) {
>>>> + chain = g_new0(virLogCleanerChain, 1);
>>>> + g_hash_table_insert(data->chains, (void *) chain_prefix,
>>>> chain);
>>>> + chain_prefix = NULL; /* Moved to hash table */
>>>
>>> You can use g_steal_pointer() to collapse these two line >>>
>>>> + }
>>>> +
>>>> + chain->last_modified = MAX(chain->last_modified,
sb->st_mtime);
>>>
>>> I think you meant MIN here.
>>
>> No, MAX should be used since we are looking for the most recent file. If
>> the chain was written to recent enough, we would skip it during the
>> deletion phase.
>>
>
> last_modified is set to G_MAXINT64, MAX with anything else int64 will
> always return G_MAXINT64. If you wanted to do MIN() here, then
That's the desired thing. For opened log files, we want to set
"the-most-recent-time" aka G_MAXINT64 to prevent its deletion. I've
added a comment explaining this.
There should be a v3 nearby.
Oleg
>
> Also I recently learned that MAX is from glibc, so setting it
> conditionally might be a better choice.
>
>>>
>>>> + chain->rotated_max_index = MAX(chain->rotated_max_index,
>>>> + rotated_index);
>>>> +}
>>>> +
>>>> +static GHashTable*
>>>> +virLogCleanerCreateTable(virLogHandler *handler)
>>>> +{
>>>> + /* (const char*) chain_prefix -> (virLogCleanerChain*) chain */
>>>> + GHashTable *chains = g_hash_table_new_full(g_str_hash,
>>>> g_str_equal,
>>>> + g_free, g_free);
>>>> + size_t i;
>>>> + virLogHandlerLogFile *file;
>>>> + const char *chain_prefix;
>>>> + virLogCleanerChain* chain;
>>>> + VIR_LOCK_GUARD lock = virObjectLockGuard(handler);
>>>> +
>>>
>>> You are locking the handler in an event loop thread, which feels a bit
>>> messy to me.
>>
>> But this is only for this function, which doesn't have any IO or
>> anything heavy.
>>
>
> That's why I did not say it's unacceptable, it just *feels* dirty. If
> this was done in a worker job, that would be a different thing. But I
> do not oppose to this completely.
>
>> The rest is clear to me, I will fix in a v3.
>>
>> Oleg
>>
>>>
>>>> + for (i = 0; i < handler->nfiles; i++) {
>>>> + file = handler->files[i];
>>>> + chain_prefix =
>>>> virLogCleanerParseFilename(virRotatingFileWriterGetPath(file->file),
>>>> + NULL);
>>>> + if (!chain_prefix) {
>>>> + continue;
>>>> + }
>>>> +
>>>> + chain = g_new0(virLogCleanerChain, 1);
>>>> + chain->last_modified = (time_t) G_MAXINT64;
>>>> + g_hash_table_insert(chains, (void *) chain_prefix, chain);
>>>> + }
>>>> +
>>>> + return chains;
>>>> +}
>>>> +
>>>> +static void
>>>> +virLogCleanerProcessFolder(virLogCleanerData *data,
>>>> + const char *path,
>>>> + int depth_left)
>>>> +{
>>>> + DIR *dir;
>>>> + struct dirent *entry;
>>>> + struct stat sb;
>>>> +
>>>> + if (virDirOpenIfExists(&dir, path) < 0)
>>>> + return;
>>>> +
>>>> + while (virDirRead(dir, &entry, path) > 0) {
>>>> + g_autofree char *newpath = NULL;
>>>> + if (STREQ(entry->d_name, "."))
>>>> + continue;
>>>> + if (STREQ(entry->d_name, ".."))
>>>> + continue;
>>>> +
>>>
>>> These two entries are skipped in virDirRead() already.
>>>
>>>> + newpath = g_strdup_printf("%s/%s", path,
entry->d_name);
>>>> +
>>>> + if (stat(newpath, &sb) < 0) {
>>>> + virReportSystemError(errno, _("Unable to stat
%s"),
>>>> newpath);
>>>> + continue;
>>>> + }
>>>> +
>>>> + if (S_ISDIR(sb.st_mode)) {
>>>> + if (depth_left > 0)
>>>> + virLogCleanerProcessFolder(data, newpath, depth_left
>>>> - 1);
>>>> + continue;
>>>> + }
>>>> +
>>>> + virLogCleanerProcessFile(data, newpath, &sb);
>>>> + }
>>>> +
>>>> + virDirClose(dir);
>>>> +}
>>>> +
>>>> +static void
>>>> +virLogCleanerChainCB(gpointer key,
>>>> + gpointer value,
>>>> + gpointer user_data)
>>>> +{
>>>> + const char* chain_prefix = key;
>>>> + virLogCleanerChain* chain = value;
>>>> + virLogCleanerData* data = user_data;
>>>> + size_t i;
>>>> +
>>>> + if (chain->last_modified > data->oldest_to_keep) {
>>>> + return;
>>>> + }
>>>> +
>>>> + for (i = 0; i <= chain->rotated_max_index; i++) {
>>>> + g_autofree char* path = NULL;
>>>> + if (i == 0) {
>>>> + path = g_strdup_printf("%s.log", chain_prefix);
>>>> + } else {
>>>> + path = g_strdup_printf("%s.log.%ld", chain_prefix,
i);
>>>> + }
>>>
>>> Here you're not even considering .log.0
>>>
>>>> + if (unlink(path) < 0) {
>>>
>>> You should also skip ENOENT be here.
>>>
>>>> + virReportSystemError(errno, _("Unable to delete
%s"),
>>>> path);
>>>
>>> Since you are running this in the event loop thread I'd prefer VIR_WARN
>>> for these kind of things.
>>