Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/rpm-software-management/createrepo_c.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel Raiskup <praiskup@redhat.com>2022-08-08 04:59:34 +0300
committeramatej <matej.ales@seznam.cz>2022-11-03 18:51:36 +0300
commit7b159f2f3479d6b979296b74b23ba80537dd899f (patch)
tree728f8e76b17e7f4322a946cf46b1105b67d4edd5
parent269bb3b4f6426082a560017a80b9b014180dbc0e (diff)
New (hidden) --delayed-dump option
Normally we dump (to XML and DB) directly from the pool of threading workers, in a streaming fashion (as soon as the metadata about packages are determined it is written). The new option causes that the metadata are loaded first (still in parallel), and then written at one time (by one thread for now). Since all the metadata (cr_Package) is available in the memory in one time, we can do some additional processing and filtering (before we eventually perform the dump). This is an optional behavior because the delayed XML chunk preparation slows down the overall multi-threaded processing time.
-rw-r--r--src/cmd_parser.h3
-rw-r--r--src/createrepo_c.c35
-rw-r--r--src/dumper_thread.c76
-rw-r--r--src/dumper_thread.h8
4 files changed, 114 insertions, 8 deletions
diff --git a/src/cmd_parser.h b/src/cmd_parser.h
index 03cfcf0..3133741 100644
--- a/src/cmd_parser.h
+++ b/src/cmd_parser.h
@@ -139,6 +139,9 @@ struct CmdOptions {
GSList *modulemd_metadata; /*!< paths to all modulemd metadata */
gboolean recycle_pkglist;
+ gboolean delayed_dump; /*!< Load _all_ the packages (parallel workers)
+ first, and then dump the database. This
+ allows additional package filtering. */
};
/**
diff --git a/src/createrepo_c.c b/src/createrepo_c.c
index ad99ff6..2c2886f 100644
--- a/src/createrepo_c.c
+++ b/src/createrepo_c.c
@@ -779,6 +779,11 @@ main(int argc, char **argv)
g_debug("Package count: %ld", task_count);
g_message("Directory walk done - %ld packages", task_count);
+ user_data.task_count = task_count;
+ if (cmd_options->delayed_dump)
+ // call this when we know the expected task_count
+ cr_delayed_dump_set(&user_data);
+
if (cmd_options->update) {
if (old_metadata)
g_debug("Old metadata already loaded.");
@@ -1058,9 +1063,11 @@ main(int argc, char **argv)
// Set number of packages
g_debug("Setting number of packages");
- cr_xmlfile_set_num_of_pkgs(pri_cr_file, task_count, NULL);
- cr_xmlfile_set_num_of_pkgs(fil_cr_file, task_count, NULL);
- cr_xmlfile_set_num_of_pkgs(oth_cr_file, task_count, NULL);
+ if (!cmd_options->delayed_dump) {
+ cr_xmlfile_set_num_of_pkgs(pri_cr_file, task_count, NULL);
+ cr_xmlfile_set_num_of_pkgs(fil_cr_file, task_count, NULL);
+ cr_xmlfile_set_num_of_pkgs(oth_cr_file, task_count, NULL);
+ }
// Open sqlite databases
gchar *pri_db_filename = NULL;
@@ -1273,9 +1280,11 @@ main(int argc, char **argv)
// Set number of packages
g_debug("Setting number of packages");
- cr_xmlfile_set_num_of_pkgs(pri_cr_zck, task_count, NULL);
- cr_xmlfile_set_num_of_pkgs(fil_cr_zck, task_count, NULL);
- cr_xmlfile_set_num_of_pkgs(oth_cr_zck, task_count, NULL);
+ if (!cmd_options->delayed_dump) {
+ cr_xmlfile_set_num_of_pkgs(pri_cr_zck, task_count, NULL);
+ cr_xmlfile_set_num_of_pkgs(fil_cr_zck, task_count, NULL);
+ cr_xmlfile_set_num_of_pkgs(oth_cr_zck, task_count, NULL);
+ }
}
// Thread pool - User data initialization
@@ -1299,7 +1308,6 @@ main(int argc, char **argv)
user_data.checksum_cachedir = cmd_options->checksum_cachedir;
user_data.skip_symlinks = cmd_options->skip_symlinks;
user_data.repodir_name_len = strlen(in_dir);
- user_data.task_count = task_count;
user_data.package_count = 0;
user_data.nevra_table = g_hash_table_new(g_str_hash, g_str_equal);
user_data.skip_stat = cmd_options->skip_stat;
@@ -1364,6 +1372,19 @@ main(int argc, char **argv)
}
g_hash_table_destroy(user_data.nevra_table);
+ if (cmd_options->delayed_dump) {
+ // Finally dump the delayed (new) metadata! (no threading for now)
+ cr_xmlfile_set_num_of_pkgs(pri_cr_file, task_count, NULL);
+ cr_xmlfile_set_num_of_pkgs(fil_cr_file, task_count, NULL);
+ cr_xmlfile_set_num_of_pkgs(oth_cr_file, task_count, NULL);
+ if (cmd_options->zck_compression) {
+ cr_xmlfile_set_num_of_pkgs(pri_cr_zck, task_count, NULL);
+ cr_xmlfile_set_num_of_pkgs(fil_cr_zck, task_count, NULL);
+ cr_xmlfile_set_num_of_pkgs(oth_cr_zck, task_count, NULL);
+ }
+ cr_delayed_dump_run(&user_data);
+ }
+
// if there were any errors, exit nonzero
if ( user_data.had_errors ) {
exit_val = 2;
diff --git a/src/dumper_thread.c b/src/dumper_thread.c
index 777f7e4..21f545e 100644
--- a/src/dumper_thread.c
+++ b/src/dumper_thread.c
@@ -233,6 +233,57 @@ write_pkg(long id,
g_mutex_unlock(&(udata->mutex_oth));
}
+
+struct DelayedTask {
+ cr_Package *pkg;
+ gboolean clean;
+};
+
+
+void
+cr_delayed_dump_set(gpointer user_data)
+{
+ struct UserData *udata = (struct UserData *) user_data;
+ udata->delayed_write = g_array_sized_new(TRUE, TRUE,
+ sizeof(struct DelayedTask),
+ udata->task_count);
+}
+
+
+void
+cr_delayed_dump_run(gpointer user_data)
+{
+ GError *tmp_err = NULL;
+ struct UserData *udata = (struct UserData *) user_data;
+ long int stop = udata->task_count;
+ g_debug("Performing the delayed metadata dump");
+ for (int id = 0; id < stop; id++) {
+ struct DelayedTask dtask = g_array_index(udata->delayed_write,
+ struct DelayedTask, id);
+ if (!dtask.pkg)
+ return; // dumper pool failed to load this package
+
+ struct cr_XmlStruct res = cr_xml_dump(dtask.pkg, &tmp_err);
+ if (tmp_err) {
+ g_critical("Cannot dump XML for %s (%s): %s",
+ dtask.pkg->name, dtask.pkg->pkgId, tmp_err->message);
+ udata->had_errors = TRUE;
+ g_clear_error(&tmp_err);
+ }
+ else {
+ write_pkg(id, res, dtask.pkg, udata);
+ }
+
+ if (dtask.clean) {
+ cr_package_free(dtask.pkg);
+ }
+ g_free(res.primary);
+ g_free(res.filelists);
+ g_free(res.other);
+ }
+}
+
+
static char *
get_checksum(const char *filename,
cr_ChecksumType type,
@@ -425,6 +476,19 @@ cr_dumper_thread(gpointer data, gpointer user_data)
struct UserData *udata = (struct UserData *) user_data;
struct PoolTask *task = (struct PoolTask *) data;
+ struct DelayedTask *dtask = NULL;
+ if (udata->delayed_write) {
+ // even if we might found out that this is an invalid package,
+ // we have to allocate a delayed task, we have to assure that
+ // len(delayed_write) == udata->task_count and that all items
+ // are processed in the delayed run.
+ dtask = &g_array_index(udata->delayed_write,
+ struct DelayedTask,
+ task->id);
+ dtask->pkg = NULL;
+ dtask->clean = FALSE;
+ }
+
// get location_href without leading part of path (path to repo)
// including '/' char
_cleanup_free_ gchar *location_href = NULL;
@@ -580,6 +644,16 @@ cr_dumper_thread(gpointer data, gpointer user_data)
g_array_append_val(pkg_locations, location);
g_mutex_unlock(&(udata->mutex_nevra_table));
+ if (dtask) {
+ dtask->pkg = pkg;
+ dtask->clean = pkg_new ? TRUE : FALSE;
+ g_free(task->full_path);
+ g_free(task->filename);
+ g_free(task->path);
+ g_free(task);
+ return;
+ }
+
// Pre-calculate the XML data aside any critical section, and early enough
// so we can put it into the buffer (so buffered single-threaded write later
// is faster).
@@ -636,7 +710,7 @@ task_cleanup:
if (pkg_new)
cr_package_free(pkg_new);
- if (udata->id_pri <= task->id) {
+ if (!dtask && udata->id_pri <= task->id) {
// An error was encountered and we have to wait to increment counters
wait_for_incremented_ids(task->id, udata);
}
diff --git a/src/dumper_thread.h b/src/dumper_thread.h
index fb815cb..822dcf1 100644
--- a/src/dumper_thread.h
+++ b/src/dumper_thread.h
@@ -109,12 +109,20 @@ struct UserData {
FILE *output_pkg_list; // File where a list of read packages is written
GMutex mutex_output_pkg_list; // Mutex for output_pkg_list file
+ GArray *delayed_write; // Dump these files once all packages are loaded
};
void
cr_dumper_thread(gpointer data, gpointer user_data);
+
+void
+cr_delayed_dump_set(gpointer user_data);
+
+void
+cr_delayed_dump_run(gpointer user_data);
+
/** @} */
#ifdef __cplusplus