diff options
author | Daniel Alley <dalley@redhat.com> | 2022-02-22 23:29:58 +0300 |
---|---|---|
committer | amatej <matej.ales@seznam.cz> | 2022-03-17 15:31:32 +0300 |
commit | 869b789f0f4eedfab561f7d397a578cbc5966ed9 (patch) | |
tree | 79f985e663583b2454199df2803b8bfdc87b10dd | |
parent | d0e0811dea2813cb1837cce85177ac18aef933d7 (diff) |
Add a streaming parsing API that is user-controllable.
Allow the user to control the parsing loop ("external" iteration).
This provides more flexibility in how packages are handled, accomodating
lazy / pull-based workflows.
-rwxr-xr-x | examples/python/repodata_parsing.py | 108 | ||||
-rw-r--r-- | src/python/createrepo_c/__init__.py | 5 | ||||
-rw-r--r-- | src/python/createrepo_cmodule.c | 6 | ||||
-rw-r--r-- | src/python/package-py.c | 2 | ||||
-rw-r--r-- | src/python/xml_parser-py.c | 229 | ||||
-rw-r--r-- | src/python/xml_parser-py.h | 7 | ||||
-rw-r--r-- | src/xml_parser.h | 15 | ||||
-rw-r--r-- | src/xml_parser_main_metadata_together.c | 306 | ||||
-rw-r--r-- | tests/fixtures.h | 3 | ||||
-rw-r--r-- | tests/python/tests/fixtures.py | 5 | ||||
-rw-r--r-- | tests/python/tests/test_xml_parser.py | 211 | ||||
-rw-r--r-- | tests/test_xml_parser_main_metadata_together.c | 131 | ||||
-rw-r--r-- | tests/testdata/repo_with_duplicate_packages/repodata/filelists.xml.gz | bin | 0 -> 341 bytes | |||
-rw-r--r-- | tests/testdata/repo_with_duplicate_packages/repodata/other.xml.gz | bin | 0 -> 405 bytes | |||
-rw-r--r-- | tests/testdata/repo_with_duplicate_packages/repodata/primary.xml.gz | bin | 0 -> 1017 bytes | |||
-rw-r--r-- | tests/testdata/repo_with_duplicate_packages/repodata/repomd.xml | 28 |
16 files changed, 896 insertions, 160 deletions
diff --git a/examples/python/repodata_parsing.py b/examples/python/repodata_parsing.py index 518962a..e07d7ae 100755 --- a/examples/python/repodata_parsing.py +++ b/examples/python/repodata_parsing.py @@ -77,18 +77,63 @@ def print_package_info(pkg): print("Changelogs:") print_changelogs(pkg.changelogs) -def first_method(): - """Use of this method is discouraged.""" + +def streaming_iterator(): + """Parsing main metadata types (primary, filelists, other) at the same time. + This approach significantly reduces memory footprint because we don't need + to keep all the packages in memory, user can handle them one by one. + + This is the most flexible method, and the recommended one if you need all of the + RPM metadata. If you only need to parse one file it might not be the most efficient. + """ + + def warningcb(warning_type, message): + print("PARSER WARNING: %s" % message) + return True + + repomd = cr.Repomd() + cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"), repomd, warningcb) + + primary_xml_path = None + filelists_xml_path = None + other_xml_path = None + for record in repomd.records: + if record.type == "primary": + primary_xml_path = os.path.join(REPO_PATH, record.location_href) + elif record.type == "filelists": + filelists_xml_path = os.path.join(REPO_PATH, record.location_href) + elif record.type == "other": + other_xml_path = os.path.join(REPO_PATH, record.location_href) + + # + # Main XML metadata parsing (primary, filelists, other) + # + package_iterator = cr.PackageIterator(primary_path=primary_xml_path, + filelists_path=filelists_xml_path, + other_path=other_xml_path, + warningcb=warningcb) + + for pkg in package_iterator: + # Called when whole package entry from all 3 metadata xml files is parsed + print_package_info(pkg) + + +def oneshot(): + """Parse all repo metadata for a given repo path. + + Use of this method is discouraged. + """ md = cr.Metadata() md.locate_and_load_xml(REPO_PATH) for key in md.keys(): pkg = md.get(key) print_package_info(pkg) -def second_method(): - """Preferred method for repodata parsing. - Important callbacks for repodata parsing: +def oneshot_callback(): + """Parse one file at a time into a set of packages. + + Use of this method is discouraged. newpkgcb -------- @@ -211,58 +256,17 @@ def second_method(): for pkg in packages.values(): print_package_info(pkg) -def third_method(): - """Parsing main metadata types (primary, filelists, other) at the same time. - This approach significantly reduces memory footprint because we don't need - to keep all the packages in memory, user can handle them one by one. - - The API reflects xml_parse_primary/filelists/other except that it handles - all of them at the same time. - - """ - def warningcb(warning_type, message): - print("PARSER WARNING: %s" % message) - return True - - repomd = cr.Repomd() - cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"), repomd, warningcb) - - primary_xml_path = None - filelists_xml_path = None - other_xml_path = None - for record in repomd.records: - if record.type == "primary": - primary_xml_path = os.path.join(REPO_PATH, record.location_href) - elif record.type == "filelists": - filelists_xml_path = os.path.join(REPO_PATH, record.location_href) - elif record.type == "other": - other_xml_path = os.path.join(REPO_PATH, record.location_href) - - # - # Main XML metadata parsing (primary, filelists, other) - # - - def pkgcb(pkg): - # Called when whole package entry from all 3 metadata xml files is parsed - print_package_info(pkg) - - cr.xml_parse_main_metadata_together(primary_xml_path, - filelists_xml_path, - other_xml_path, - None, - pkgcb, - warningcb) if __name__ == "__main__": - print('"All in one shot" method:') - first_method() + print("Streaming iterator based method:") + streaming_iterator() print() - print("Callback based method:") - second_method() + print('"All in one shot" method:') + oneshot() print() - print("Streaming callback based method:") - third_method() + print("Callback based method:") + oneshot_callback() diff --git a/src/python/createrepo_c/__init__.py b/src/python/createrepo_c/__init__.py index 5c3fc63..440e559 100644 --- a/src/python/createrepo_c/__init__.py +++ b/src/python/createrepo_c/__init__.py @@ -367,6 +367,11 @@ compression_suffix = _createrepo_c.compression_suffix detect_compression = _createrepo_c.detect_compression compression_type = _createrepo_c.compression_type +class PackageIterator(_createrepo_c.PkgIterator): + def __init__(self, primary_path, filelists_path, other_path, newpkgcb=None, warningcb=None): + """Parse completed packages one at a time.""" + _createrepo_c.PkgIterator.__init__( + self, primary_path, filelists_path, other_path, newpkgcb, warningcb) # If we have been built as a Python package, e.g. "setup.py", this is where the binaries # will be located. diff --git a/src/python/createrepo_cmodule.c b/src/python/createrepo_cmodule.c index c0b9200..fd6e7ef 100644 --- a/src/python/createrepo_cmodule.c +++ b/src/python/createrepo_cmodule.c @@ -217,6 +217,12 @@ PyInit__createrepo_c(void) Py_INCREF(&XmlFile_Type); PyModule_AddObject(m, "XmlFile", (PyObject *)&XmlFile_Type); + /* _createrepo_c.PkgIterator */ + if (PyType_Ready(&PkgIterator_Type) < 0) + return NULL; + Py_INCREF(&PkgIterator_Type); + PyModule_AddObject(m, "PkgIterator", (PyObject *)&PkgIterator_Type); + /* Createrepo init */ cr_xml_dump_init(); diff --git a/src/python/package-py.c b/src/python/package-py.c index 4d455d8..316f3cc 100644 --- a/src/python/package-py.c +++ b/src/python/package-py.c @@ -579,7 +579,7 @@ PyTypeObject Package_Type = { .tp_basicsize = sizeof(_PackageObject), .tp_dealloc = (destructor) package_dealloc, .tp_repr = (reprfunc) package_repr, - .tp_str = (reprfunc)package_str, + .tp_str = (reprfunc) package_str, .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, .tp_doc = package_init__doc__, .tp_iter = PyObject_SelfIter, diff --git a/src/python/xml_parser-py.c b/src/python/xml_parser-py.c index dba9eb5..d9f05f9 100644 --- a/src/python/xml_parser-py.c +++ b/src/python/xml_parser-py.c @@ -790,3 +790,232 @@ py_xml_parse_main_metadata_together(G_GNUC_UNUSED PyObject *self, PyObject *args Py_RETURN_NONE; } + +typedef struct { + PyObject_HEAD + cr_PkgIterator *pkg_iterator; + CbData *cbdata; +} _PkgIteratorObject; + +cr_PkgIterator * +PkgIterator_FromPyObject(PyObject *o) +{ + if (!PkgIteratorObject_Check(o)) { + PyErr_SetString(PyExc_TypeError, "Expected a createrepo_c.PkgIterator object."); + return NULL; + } + return ((_PkgIteratorObject *)o)->pkg_iterator; +} + +PyObject * +Object_FromPkgIterator(cr_PkgIterator *pkg_iterator) +{ + if (!pkg_iterator) { + PyErr_SetString(PyExc_ValueError, "Expected a cr_PkgIterator pointer not NULL."); + return NULL; + } + + PyObject *py_pkg_iterator = PyObject_CallObject((PyObject *)&PkgIterator_Type, NULL); + ((_PkgIteratorObject *)py_pkg_iterator)->pkg_iterator = pkg_iterator; + return py_pkg_iterator; +} + +static int +check_PkgIteratorStatus(const _PkgIteratorObject *self) +{ + assert(self != NULL); + assert(PkgIteratorObject_Check(self)); + if (self->pkg_iterator == NULL) { + PyErr_SetString(CrErr_Exception, "Improper createrepo_c PkgIterator object."); + return -1; + } + return 0; +} + +/* Function on the type */ + +static PyObject * +pkg_iterator_new(PyTypeObject *type, + G_GNUC_UNUSED PyObject *args, + G_GNUC_UNUSED PyObject *kwds) +{ + _PkgIteratorObject *self = (_PkgIteratorObject *)type->tp_alloc(type, 0); + if (self) { + self->pkg_iterator = NULL; + self->cbdata = g_malloc0(sizeof(CbData)); + } + return (PyObject *)self; +} + +PyDoc_STRVAR(pkg_iterator_init__doc__, + "PkgIterator object\n\n" + ".. method:: __init__()\n\n" + " Default constructor\n"); + +static int +pkg_iterator_init(_PkgIteratorObject *self, PyObject *args, PyObject *kwargs) +{ + char *primary_path; + char *filelists_path; + char *other_path; + PyObject *py_newpkgcb, *py_warningcb; + GError *tmp_err = NULL; + static char *kwlist[] = {"primary", "filelists", "other", "newpkgcb", + "warningcb", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sssOO:pkg_iterator_init", kwlist, + &primary_path, &filelists_path, &other_path, &py_newpkgcb, + &py_warningcb)) { + return -1; + } + + if (!primary_path || !filelists_path || !other_path) { + PyErr_SetString(PyExc_TypeError, "file paths must be provided"); + return -1; + } + + if (!PyCallable_Check(py_newpkgcb) && py_newpkgcb != Py_None) { + PyErr_SetString(PyExc_TypeError, "newpkgcb must be callable or None"); + return -1; + } + + if (!PyCallable_Check(py_warningcb) && py_warningcb != Py_None) { + PyErr_SetString(PyExc_TypeError, "warningcb must be callable or None"); + return -1; + } + + if (self->pkg_iterator) // reinitialization by __init__() + cr_PkgIterator_free(self->pkg_iterator, &tmp_err); + if (tmp_err) { + nice_exception(&tmp_err, NULL); + return -1; + } + + Py_XINCREF(py_newpkgcb); + Py_XINCREF(py_warningcb); + + cr_XmlParserNewPkgCb ptr_c_newpkgcb = NULL; + cr_XmlParserWarningCb ptr_c_warningcb = NULL; + + if (py_newpkgcb != Py_None) + ptr_c_newpkgcb = c_newpkgcb; + if (py_warningcb != Py_None) + ptr_c_warningcb = c_warningcb; + + self->cbdata->py_newpkgcb = py_newpkgcb; + self->cbdata->py_pkgcb = NULL; + self->cbdata->py_warningcb = py_warningcb; + self->cbdata->py_pkgs = PyDict_New(); + + self->pkg_iterator = cr_PkgIterator_new( + primary_path, filelists_path, other_path, ptr_c_newpkgcb, self->cbdata, ptr_c_warningcb, self->cbdata, &tmp_err); + + if (tmp_err) { + nice_exception(&tmp_err, NULL); + return -1; + } + + if (self->pkg_iterator == NULL) { + PyErr_SetString(CrErr_Exception, "PkgIterator initialization failed"); + return -1; + } + + return 0; +} + +static void +pkg_iterator_dealloc(_PkgIteratorObject *self) +{ + GError *tmp_err; + if (self->pkg_iterator) { + cr_PkgIterator_free(self->pkg_iterator, &tmp_err); + } + if (self->cbdata) { + Py_XDECREF(self->cbdata->py_newpkgcb); + Py_XDECREF(self->cbdata->py_warningcb); + Py_XDECREF(self->cbdata->py_pkgs); + + free(self->cbdata); + } + + Py_TYPE(self)->tp_free(self); +} + +static PyObject * +pkg_iterator_next_package(_PkgIteratorObject *self, G_GNUC_UNUSED void *nothing) +{ + cr_Package *pkg; + GError *tmp_err = NULL; + + if (check_PkgIteratorStatus(self)) { + return NULL; + } + pkg = cr_PkgIterator_parse_next(self->pkg_iterator, &tmp_err); + if (tmp_err) { + cr_package_free(pkg); + nice_exception(&tmp_err, NULL); + return NULL; + } + + if (!pkg) { + assert(cr_PkgIterator_is_finished(self->pkg_iterator)); + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + PyObject *keyFromPtr = PyLong_FromVoidPtr(pkg); + PyObject *py_pkg = PyDict_GetItem(self->cbdata->py_pkgs, keyFromPtr); + if (py_pkg) { + // Remove pkg from PyDict but keep one reference so its not freed if the + // user doesn't have any references to the package + Py_XINCREF(py_pkg); + PyDict_DelItem(self->cbdata->py_pkgs, keyFromPtr); + } else { + // The package was not provided by user in c_newpkgcb, + // create new python package object + py_pkg = Object_FromPackage(pkg, 1); + } + Py_DECREF(keyFromPtr); + return py_pkg; + +} + +static PyObject * +pkg_iterator_is_finished(_PkgIteratorObject *self, G_GNUC_UNUSED void *nothing) { + if (check_PkgIteratorStatus(self)) + return NULL; + + if (cr_PkgIterator_is_finished (self->pkg_iterator)) { + Py_RETURN_TRUE; + } else { + Py_RETURN_FALSE; + } +} + +PyDoc_STRVAR(pkg_iterator_is_finished__doc__, + "Whether the package iterator has been consumed. \n\n" + ".. method:: is_finished()\n\n" + " Whether the iterator is consumed.\n"); + +static struct PyMethodDef pkg_iterator_methods[] = { + {"is_finished", (PyCFunction) pkg_iterator_is_finished, METH_NOARGS, pkg_iterator_is_finished__doc__}, + {NULL, NULL, 0, NULL} /* sentinel */ +}; + +/* Object */ + +PyTypeObject PkgIterator_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "createrepo_c.PkgIterator", + .tp_basicsize = sizeof(_PkgIteratorObject), + .tp_dealloc = (destructor) pkg_iterator_dealloc, + .tp_repr = 0, // (reprfunc) pkg_iterator_repr, + .tp_str = 0, //(reprfunc) pkg_iterator_str, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .tp_doc = pkg_iterator_init__doc__, + .tp_iter = PyObject_SelfIter, + .tp_iternext = pkg_iterator_next_package, + .tp_methods = pkg_iterator_methods, + .tp_init = (initproc) pkg_iterator_init, + .tp_new = pkg_iterator_new, +}; diff --git a/src/python/xml_parser-py.h b/src/python/xml_parser-py.h index 3260005..df5c620 100644 --- a/src/python/xml_parser-py.h +++ b/src/python/xml_parser-py.h @@ -72,4 +72,11 @@ PyDoc_STRVAR(xml_parse_main_metadata_together__doc__, PyObject *py_xml_parse_main_metadata_together(PyObject *self, PyObject *args, PyObject *kwargs); +extern PyTypeObject PkgIterator_Type; + +#define PkgIteratorObject_Check(o) PyObject_TypeCheck(o, &PkgIterator_Type) + +PyObject *Object_FromPkgIterator(cr_PkgIterator *iter); +cr_PkgIterator *PkgIterator_FromPyObject(PyObject *o); + #endif diff --git a/src/xml_parser.h b/src/xml_parser.h index a81ce65..6e1e3f1 100644 --- a/src/xml_parser.h +++ b/src/xml_parser.h @@ -316,6 +316,21 @@ cr_xml_parse_main_metadata_together(const char *primary_path, void *warningcb_data, GError **err); +typedef struct _cr_PkgIterator cr_PkgIterator; + +cr_PkgIterator * +cr_PkgIterator_new(const char *primary_path, + const char *filelists_path, + const char *other_path, + cr_XmlParserNewPkgCb newpkgcb, + void *newpkgcb_data, + cr_XmlParserWarningCb warningcb, + void *warningcb_data, + GError **err); +cr_Package* cr_PkgIterator_parse_next(cr_PkgIterator *iter, GError **err); +void cr_PkgIterator_free(cr_PkgIterator *iter, GError **err); +gboolean cr_PkgIterator_is_finished(cr_PkgIterator *iter); + /** @} */ #ifdef __cplusplus diff --git a/src/xml_parser_main_metadata_together.c b/src/xml_parser_main_metadata_together.c index ef8212d..cc3f221 100644 --- a/src/xml_parser_main_metadata_together.c +++ b/src/xml_parser_main_metadata_together.c @@ -35,14 +35,37 @@ typedef struct { int in_progress_count_primary; int in_progress_count_filelists; int in_progress_count_other; + GQueue *finished_pkgs_queue; cr_XmlParserNewPkgCb newpkgcb; // newpkgcb passed in from user void *newpkgcb_data;// newpkgcb data passed in from user cr_XmlParserPkgCb pkgcb; // pkgcb passed in from user void *pkgcb_data; // pkgcb data passed in from user } cr_CbData; +struct _cr_PkgIterator { + CR_FILE *primary_f; + CR_FILE *filelists_f; + CR_FILE *other_f; + + const char *primary_path; + const char *filelists_path; + const char *other_path; + + void *primary_pd; + void *filelists_pd; + void *other_pd; + + GError *tmp_err; + + gboolean primary_is_done; + gboolean filelists_is_done; + gboolean other_is_done; + + void *cbdata; +}; + static int -call_user_callback_if_package_finished(cr_Package *pkg, cr_CbData *cb_data, GError **err) +queue_package_if_finished(cr_Package *pkg, cr_CbData *cb_data, GError **err) { if (pkg && (pkg->loadingflags & CR_PACKAGE_LOADED_PRI) && (pkg->loadingflags & CR_PACKAGE_LOADED_OTH) && (pkg->loadingflags & CR_PACKAGE_LOADED_FIL)) @@ -55,19 +78,7 @@ call_user_callback_if_package_finished(cr_Package *pkg, cr_CbData *cb_data, GErr cb_data->in_progress_count_filelists--; cb_data->in_progress_count_other--; - // call user package callback - GError *tmp_err = NULL; - if (cb_data->pkgcb && cb_data->pkgcb(pkg, cb_data->pkgcb_data, &tmp_err)) { - if (tmp_err) - g_propagate_prefixed_error(err, tmp_err, "Parsing interrupted: "); - else - g_set_error(err, ERR_DOMAIN, CRE_CBINTERRUPTED, "Parsing interrupted"); - return CR_CB_RET_ERR; - } else { - // If callback return CRE_OK but it simultaneously set - // the tmp_err then it's a programming error. - assert(tmp_err == NULL); - }; + g_queue_push_tail(cb_data->finished_pkgs_queue, pkg); } return CR_CB_RET_OK; } @@ -199,7 +210,7 @@ pkgcb_filelists(cr_Package *pkg, void *cbdata, G_GNUC_UNUSED GError **err) cr_CbData *cb_data = cbdata; cb_data->in_progress_count_filelists++; pkg->loadingflags |= CR_PACKAGE_LOADED_FIL; - return call_user_callback_if_package_finished(pkg, cb_data, err); + return queue_package_if_finished(pkg, cb_data, err); } static int @@ -208,7 +219,7 @@ pkgcb_other(cr_Package *pkg, void *cbdata, G_GNUC_UNUSED GError **err) cr_CbData *cb_data = cbdata; cb_data->in_progress_count_other++; pkg->loadingflags |= CR_PACKAGE_LOADED_OTH; - return call_user_callback_if_package_finished(pkg, cb_data, err); + return queue_package_if_finished(pkg, cb_data, err); } static int @@ -271,7 +282,7 @@ pkgcb_primary(cr_Package *pkg, void *cbdata, G_GNUC_UNUSED GError **err) pkg->loadingflags |= CR_PACKAGE_FROM_XML; } - return call_user_callback_if_package_finished(pkg, cb_data, err); + return queue_package_if_finished(pkg, cb_data, err); } static gboolean @@ -307,7 +318,7 @@ parse_next_section(CR_FILE *target_file, const char *path, cr_ParserData *pd, GE } //TODO(amatej): there is quite some overlap with this and cr_load_xml_files, -// we could use this api and just wrap it in cr_loax_xml_files? +// consider using this api to implement cr_load_xml_files? int cr_xml_parse_main_metadata_together(const char *primary_path, const char *filelists_path, const char *other_path, @@ -319,140 +330,229 @@ int cr_xml_parse_main_metadata_together(const char *primary_path, void *warningcb_data, GError **err) { - int ret = CRE_OK; - CR_FILE *primary_f = NULL; - CR_FILE *filelists_f = NULL; - CR_FILE *other_f = NULL; - GError *tmp_err = NULL; + assert(pkgcb || newpkgcb); + cr_PkgIterator* pkg_iterator = cr_PkgIterator_new( + primary_path, filelists_path, other_path, newpkgcb, newpkgcb_data, warningcb, warningcb_data, err + ); + + if (*err) { + return (*err)->code; + } + + assert(pkg_iterator); + cr_Package* package = NULL; + GError* tmp_err = NULL; + + while (package = cr_PkgIterator_parse_next(pkg_iterator, err)) { + if (pkgcb) { + // call user package callback + // pkgcb() destroys the package!! + if (pkgcb(package, pkgcb_data, &tmp_err)) { + // Error condition + if (tmp_err) { + g_propagate_prefixed_error(err, tmp_err, "Parsing interrupted: "); + } else { + g_set_error(err, ERR_DOMAIN, CRE_CBINTERRUPTED, "Parsing interrupted"); + } + cr_PkgIterator_free(pkg_iterator, err); + return CRE_CBINTERRUPTED; + } else { + // If callback return CRE_OK but it simultaneously set + // the tmp_err then it's a programming error. + assert(tmp_err == NULL); + } + } else { + // Free the package if there is no external callback to do so and we have no newpkgcb + if (!newpkgcb) { + cr_package_free(package); + } + } + } - cr_CbData cbdata; - cbdata.in_progress_pkgs_list = NULL; - cbdata.newpkgcb = newpkgcb; - cbdata.newpkgcb_data = newpkgcb_data; - cbdata.pkgcb = pkgcb; - cbdata.pkgcb_data = pkgcb_data; + cr_PkgIterator_free(pkg_iterator, err); + if (*err) { + return (*err)->code; + } else { + return CRE_OK; + } +} + +// TODO: maybe whether or not individual files are parsed could be controlled by NULL paths? I think cr_load_xml_files +// already works that way. +cr_PkgIterator * +cr_PkgIterator_new(const char *primary_path, + const char *filelists_path, + const char *other_path, + cr_XmlParserNewPkgCb newpkgcb, + void *newpkgcb_data, + cr_XmlParserWarningCb warningcb, + void *warningcb_data, + GError **err) +{ assert(primary_path); assert(filelists_path); assert(other_path); - assert(newpkgcb || pkgcb); assert(!err || *err == NULL); - cr_ParserData *primary_pd = NULL; - cr_ParserData *filelists_pd = NULL; - cr_ParserData *other_pd = NULL; + cr_PkgIterator* new_iter = g_new0(cr_PkgIterator, 1); + if (!new_iter) { + return NULL; + } + + new_iter->primary_path = primary_path; + new_iter->filelists_path = filelists_path; + new_iter->other_path = other_path; + + new_iter->primary_f = NULL; + new_iter->filelists_f = NULL; + new_iter->other_f = NULL; + + new_iter->primary_pd = NULL; + new_iter->filelists_pd = NULL; + new_iter->other_pd = NULL; + + new_iter->primary_is_done = 0; + new_iter->filelists_is_done = 0; + new_iter->other_is_done = 0; + + cr_CbData *cbdata = g_new0(cr_CbData, 1); + new_iter->cbdata = cbdata; + cbdata->in_progress_pkgs_list = NULL; + cbdata->finished_pkgs_queue = g_queue_new(); + + cbdata->in_progress_count_primary = 0; + cbdata->in_progress_count_filelists = 0; + cbdata->in_progress_count_other = 0; - primary_f = cr_open(primary_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); + cbdata->newpkgcb = newpkgcb; + cbdata->newpkgcb_data = newpkgcb_data; + + new_iter->tmp_err = NULL; + + GError* tmp_err = new_iter->tmp_err; + new_iter->primary_f = cr_open(primary_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); if (tmp_err) { - ret = tmp_err->code; g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", primary_path); - g_clear_error(&tmp_err); - goto out; + cr_PkgIterator_free(new_iter, err); + return NULL; } - filelists_f = cr_open(filelists_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); + new_iter->filelists_f = cr_open(filelists_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); if (tmp_err) { - ret = tmp_err->code; g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", filelists_path); - g_clear_error(&tmp_err); - goto out; + cr_PkgIterator_free(new_iter, err); + return NULL; } - other_f = cr_open(other_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); + new_iter->other_f = cr_open(other_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); if (tmp_err) { - ret = tmp_err->code; g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", other_path); - g_clear_error(&tmp_err); - goto out; + cr_PkgIterator_free(new_iter, err); + return NULL; } //TODO(amatej): In the future we could make filelists/other optional if there is a need for it. That would mean we // should replace the last 0 in primary_parser_data_new depending on whether we have filelists or not. - primary_pd = primary_parser_data_new(newpkgcb_primary, &cbdata, pkgcb_primary, &cbdata, warningcb, warningcb_data, 0); - filelists_pd = filelists_parser_data_new(newpkgcb_filelists, &cbdata, pkgcb_filelists, &cbdata, warningcb, warningcb_data); - other_pd = other_parser_data_new(newpkgcb_other, &cbdata, pkgcb_other, &cbdata, warningcb, warningcb_data); - - gboolean primary_is_done = 0; - gboolean filelists_is_done = 0; - gboolean other_is_done = 0; - cbdata.in_progress_count_primary = 0; - cbdata.in_progress_count_filelists = 0; - cbdata.in_progress_count_other = 0; - while (!primary_is_done || !filelists_is_done || !other_is_done) { - while ((cbdata.in_progress_count_primary <= cbdata.in_progress_count_filelists || - cbdata.in_progress_count_primary <= cbdata.in_progress_count_other) && - !primary_is_done) + new_iter->primary_pd = primary_parser_data_new(newpkgcb_primary, cbdata, pkgcb_primary, cbdata, warningcb, warningcb_data, 0); + new_iter->filelists_pd = filelists_parser_data_new(newpkgcb_filelists, cbdata, pkgcb_filelists, cbdata, warningcb, warningcb_data); + new_iter->other_pd = other_parser_data_new(newpkgcb_other, cbdata, pkgcb_other, cbdata, warningcb, warningcb_data); + return new_iter; +} + +cr_Package * +cr_PkgIterator_parse_next(cr_PkgIterator *iter, GError **err) { + cr_CbData *cbdata = (cr_CbData*) iter->cbdata; + + while (!cr_PkgIterator_is_finished(iter) && g_queue_is_empty(cbdata->finished_pkgs_queue)) { + while ((cbdata->in_progress_count_primary <= cbdata->in_progress_count_filelists || + cbdata->in_progress_count_primary <= cbdata->in_progress_count_other) && + !iter->primary_is_done) { - primary_is_done = parse_next_section(primary_f, primary_path, primary_pd, err); + iter->primary_is_done = parse_next_section(iter->primary_f, iter->primary_path, iter->primary_pd, err); if (*err) { - ret = (*err)->code; - goto out; + return NULL; } - } - while ((cbdata.in_progress_count_filelists <= cbdata.in_progress_count_primary || - cbdata.in_progress_count_filelists <= cbdata.in_progress_count_other) && - !filelists_is_done) + while ((cbdata->in_progress_count_filelists <= cbdata->in_progress_count_primary || + cbdata->in_progress_count_filelists <= cbdata->in_progress_count_other) && + !iter->filelists_is_done) { - filelists_is_done = parse_next_section(filelists_f, filelists_path, filelists_pd, err); + iter->filelists_is_done = parse_next_section(iter->filelists_f, iter->filelists_path, iter->filelists_pd, err); if (*err) { - ret = (*err)->code; - goto out; + return NULL; } } - while ((cbdata.in_progress_count_other <= cbdata.in_progress_count_filelists || - cbdata.in_progress_count_other <= cbdata.in_progress_count_primary) && - !other_is_done) + while ((cbdata->in_progress_count_other <= cbdata->in_progress_count_filelists || + cbdata->in_progress_count_other <= cbdata->in_progress_count_primary) && + !iter->other_is_done) { - other_is_done = parse_next_section(other_f, other_path, other_pd, err); + iter->other_is_done = parse_next_section(iter->other_f, iter->other_path, iter->other_pd, err); if (*err) { - ret = (*err)->code; - goto out; + return NULL; } } } -out: - if (ret != CRE_OK) { + return g_queue_pop_head(cbdata->finished_pkgs_queue); +} + +gboolean cr_PkgIterator_is_finished(cr_PkgIterator *iter) { + return iter->primary_is_done && iter->filelists_is_done && iter->other_is_done; +} + +void cr_PkgIterator_free(cr_PkgIterator *iter, GError **err) { + assert(iter); + cr_CbData *cbdata = (cr_CbData*) iter->cbdata; + + if (iter->tmp_err) { // An error already encountered // just close the file without error checking - cr_close(primary_f, NULL); - cr_close(filelists_f, NULL); - cr_close(other_f, NULL); + cr_close(iter->primary_f, NULL); + cr_close(iter->filelists_f, NULL); + cr_close(iter->other_f, NULL); } else { + GError* tmp_err = iter->tmp_err; // No error encountered yet - cr_close(primary_f, &tmp_err); - if (!tmp_err) - cr_close(filelists_f, &tmp_err); - if (!tmp_err) - cr_close(other_f, &tmp_err); + cr_close(iter->primary_f, &tmp_err); + if (!tmp_err) { + cr_close(iter->filelists_f, &tmp_err); + } + if (!tmp_err) { + cr_close(iter->other_f, &tmp_err); + } if (tmp_err) { - ret = tmp_err->code; g_propagate_prefixed_error(err, tmp_err, "Error while closing: "); } } - // When interrupted at the right time primary_pd->pkg can either be: - // - referenced in both primary_pd->pkg and cbdata.in_progress_pkgs_list or - // - referenced only in primary_pd->pkg (we have started parsing the pkg but didn't finish it yet) - // in order to avoid a crash remove it from the list if present - // primary_pd->pkg is a special case because of newpkgcb_primary - if (cbdata.in_progress_pkgs_list) { - cbdata.in_progress_pkgs_list = g_slist_remove(cbdata.in_progress_pkgs_list, primary_pd->pkg); - } - if (primary_pd) { + if (iter->primary_pd) { + // When interrupted at the right time primary_pd->pkg can either be: + // - referenced in both primary_pd->pkg and cbdata.in_progress_pkgs_list or + // - referenced only in primary_pd->pkg (we have started parsing the pkg but didn't finish it yet) + // in order to avoid a crash remove it from the list if present + // primary_pd->pkg is a special case because of newpkgcb_primary + cr_ParserData* primary_pd = iter->primary_pd; + if (cbdata->in_progress_pkgs_list) { + cbdata->in_progress_pkgs_list = g_slist_remove(cbdata->in_progress_pkgs_list, primary_pd->pkg); + } + cr_package_free(primary_pd->pkg); } - if (cbdata.newpkgcb) { - g_slist_free(cbdata.in_progress_pkgs_list); + cr_xml_parser_data_free(iter->primary_pd); + cr_xml_parser_data_free(iter->filelists_pd); + cr_xml_parser_data_free(iter->other_pd); + + + if (cbdata->newpkgcb) { + g_slist_free(cbdata->in_progress_pkgs_list); + g_queue_free(cbdata->finished_pkgs_queue); } else { - cr_slist_free_full(cbdata.in_progress_pkgs_list, (GDestroyNotify) cr_package_free); + g_slist_free_full(cbdata->in_progress_pkgs_list, (GDestroyNotify) cr_package_free); + g_queue_free_full(cbdata->finished_pkgs_queue, (GDestroyNotify) cr_package_free); } - cr_xml_parser_data_free(primary_pd); - cr_xml_parser_data_free(filelists_pd); - cr_xml_parser_data_free(other_pd); - - return ret; + g_free(cbdata); + g_free(iter); } diff --git a/tests/fixtures.h b/tests/fixtures.h index 676518c..39e48c0 100644 --- a/tests/fixtures.h +++ b/tests/fixtures.h @@ -87,6 +87,9 @@ #define TEST_MRF_UE_OTH_02 TEST_MODIFIED_REPO_FILES_PATH"unknown_element_02-other.xml" #define TEST_LONG_PRIMARY TEST_MODIFIED_REPO_FILES_PATH"long_primary.xml" #define TEST_DIFF_ORDER_FILELISTS TEST_MODIFIED_REPO_FILES_PATH"repo_02_different_order_filelists.xml" +#define TEST_PRIMARY_MULTI_WARN_00 TEST_MODIFIED_REPO_FILES_PATH"multiple_warnings_00-primary.xml" +#define TEST_FILELISTS_MULTI_WARN_00 TEST_MODIFIED_REPO_FILES_PATH"multiple_warnings_00-filelists.xml" +#define TEST_OTHER_MULTI_WARN_00 TEST_MODIFIED_REPO_FILES_PATH"multiple_warnings_00-other.xml" // Test files diff --git a/tests/python/tests/fixtures.py b/tests/python/tests/fixtures.py index 79c0172..71d19b0 100644 --- a/tests/python/tests/fixtures.py +++ b/tests/python/tests/fixtures.py @@ -90,6 +90,11 @@ REPO_02_OTHXML = os.path.join(REPO_02_PATH, "repodata/", REPO_WITH_ADDITIONAL_METADATA = os.path.join(REPOS_PATH, "repo_with_additional_metadata") +REPO_WITH_DUPLICATE_PACKAGES = os.path.join(REPOS_PATH, "repo_with_duplicate_packages") +PRIMARY_DUPLICATE_PACKAGES_PATH = os.path.join(REPO_WITH_DUPLICATE_PACKAGES, "repodata/primary.xml.gz") +FILELISTS_DUPLICATE_PACKAGES_PATH = os.path.join(REPO_WITH_DUPLICATE_PACKAGES, "repodata/filelists.xml.gz") +OTHER_DUPLICATE_PACKAGES_PATH = os.path.join(REPO_WITH_DUPLICATE_PACKAGES, "repodata/other.xml.gz") + # Test files FILE_BINARY = "binary_file" diff --git a/tests/python/tests/test_xml_parser.py b/tests/python/tests/test_xml_parser.py index 1adadc7..60eacc5 100644 --- a/tests/python/tests/test_xml_parser.py +++ b/tests/python/tests/test_xml_parser.py @@ -1,8 +1,4 @@ -import re import unittest -import shutil -import tempfile -import os.path import createrepo_c as cr from .fixtures import * @@ -1170,3 +1166,210 @@ class TestCaseXmlParserMainMetadataTogether(unittest.TestCase): FILELISTS_MULTI_WARN_00_PATH, OTHER_MULTI_WARN_00_PATH, newpkgcb, None, warningcb) + +class TestCaseXmlParserPkgIterator(unittest.TestCase): + def test_xml_parser_pkg_iterator_repo01(self): + warnings = [] + + def warningcb(warn_type, msg): + warnings.append((warn_type, msg)) + + package_iterator = cr.PackageIterator( + primary_path=REPO_01_PRIXML, filelists_path=REPO_01_FILXML, other_path=REPO_01_OTHXML, + warningcb=warningcb, + ) + + pkg = next(package_iterator) + + self.assertListEqual(warnings, []) + self.assertEqual(pkg.pkgId, "152824bff2aa6d54f429d43e87a3ff3a0286505c6d93ec87692b5e3a9e3b97bf") + self.assertEqual(pkg.name, "super_kernel") + self.assertEqual(pkg.arch, "x86_64") + self.assertEqual(pkg.version, "6.0.1") + self.assertEqual(pkg.epoch, "0") + self.assertEqual(pkg.release, "2") + self.assertEqual(pkg.summary, "Test package") + self.assertEqual(pkg.description, "This package has provides, requires, obsoletes, conflicts options.") + self.assertEqual(pkg.url, "http://so_super_kernel.com/it_is_awesome/yep_it_really_is") + self.assertEqual(pkg.time_file, 1334667003) + self.assertEqual(pkg.time_build, 1334667003) + self.assertEqual(pkg.rpm_license, "LGPLv2") + self.assertEqual(pkg.rpm_vendor, None) + self.assertEqual(pkg.rpm_group, "Applications/System") + self.assertEqual(pkg.rpm_buildhost, "localhost.localdomain") + self.assertEqual(pkg.rpm_sourcerpm, "super_kernel-6.0.1-2.src.rpm") + self.assertEqual(pkg.rpm_header_start, 280) + self.assertEqual(pkg.rpm_header_end, 2637) + self.assertEqual(pkg.rpm_packager, None) + self.assertEqual(pkg.size_package, 2845) + self.assertEqual(pkg.size_installed, 0) + self.assertEqual(pkg.size_archive, 404) + self.assertEqual(pkg.location_href, "super_kernel-6.0.1-2.x86_64.rpm") + self.assertEqual(pkg.location_base, None) + self.assertEqual(pkg.checksum_type, "sha256") + self.assertEqual(pkg.requires, + [('bzip2', 'GE', '0', '1.0.0', None, True), + ('expat', None, None, None, None, True), + ('glib', 'GE', '0', '2.26.0', None, False), + ('zlib', None, None, None, None, False)]) + self.assertEqual(pkg.provides, + [('not_so_super_kernel', 'LT', '0', '5.8.0', None, False), + ('super_kernel', 'EQ', '0', '6.0.0', None, False), + ('super_kernel', 'EQ', '0', '6.0.1', '2', False), + ('super_kernel(x86-64)', 'EQ', '0', '6.0.1', '2', False)]) + self.assertEqual(pkg.conflicts, + [('kernel', None, None, None, None, False), + ('super_kernel', 'EQ', '0', '5.0.0', None, False), + ('super_kernel', 'LT', '0', '4.0.0', None, False)]) + self.assertEqual(pkg.obsoletes, + [('kernel', None, None, None, None, False), + ('super_kernel', 'EQ', '0', '5.9.0', None, False)]) + self.assertEqual(pkg.files, + [(None, '/usr/bin/', 'super_kernel'), + (None, '/usr/share/man/', 'super_kernel.8.gz')]) + self.assertEqual(pkg.changelogs, + [('Tomas Mlcoch <tmlcoch@redhat.com> - 6.0.1-1', + 1334664000, + '- First release'), + ('Tomas Mlcoch <tmlcoch@redhat.com> - 6.0.1-2', + 1334664001, + '- Second release')]) + + self.assertRaises(StopIteration, next, package_iterator) + self.assertTrue(package_iterator.is_finished()) + + def test_xml_parser_pkg_iterator_repo02(self): + warnings = [] + def warningcb(warn_type, msg): + warnings.append((warn_type, msg)) + + package_iterator = cr.PackageIterator( + primary_path=REPO_02_PRIXML, filelists_path=REPO_02_FILXML, other_path=REPO_02_OTHXML, + warningcb=warningcb, + ) + packages = list(package_iterator) + + self.assertEqual(len(packages), 2) + self.assertEqual(packages[0].name, "fake_bash") + self.assertListEqual(warnings, []) + self.assertRaises(StopIteration, next, package_iterator) + self.assertTrue(package_iterator.is_finished()) + + def test_xml_parser_pkg_iterator_repo02_newpkgcb_as_filter(self): + def newpkgcb(pkgId, name, arch): + if name in {"fake_bash"}: + return cr.Package() + + package_iterator = cr.PackageIterator( + primary_path=REPO_02_PRIXML, filelists_path=REPO_02_FILXML, other_path=REPO_02_OTHXML, + newpkgcb=newpkgcb, + ) + + packages = list(package_iterator) + + self.assertEqual(len(packages), 1) + self.assertEqual(packages[0].name, "fake_bash") + self.assertRaises(StopIteration, next, package_iterator) + self.assertTrue(package_iterator.is_finished()) + + def test_xml_parser_pkg_iterator_warnings(self): + warnings = [] + def warningcb(warn_type, msg): + warnings.append((warn_type, msg)) + + package_iterator = cr.PackageIterator( + primary_path=PRIMARY_MULTI_WARN_00_PATH, filelists_path=FILELISTS_MULTI_WARN_00_PATH, other_path=OTHER_MULTI_WARN_00_PATH, + warningcb=warningcb, + ) + + self.assertEqual(next(package_iterator).name, 'fake_bash') + self.assertFalse(package_iterator.is_finished()) + self.assertEqual(next(package_iterator).name, 'super_kernel') + + self.assertRaises(StopIteration, next, package_iterator) + self.assertTrue(package_iterator.is_finished()) + + self.assertEqual(warnings, + [(0, 'Unknown element "fooelement"'), + (1, 'Missing attribute "type" of a package element'), + (0, 'Unknown element "foo"'), + (3, 'Conversion of "foobar" to integer failed'), + (0, 'Unknown element "bar"'), + (1, 'Missing attribute "arch" of a package element'), + (2, 'Unknown file type "xxx"'), + (0, 'Unknown element "bar"'), + (1, 'Missing attribute "name" of a package element'), + (0, 'Unknown element "bar"'), + (3, 'Conversion of "xxx" to integer failed')]) + + + def test_xml_parser_package_iterator_error(self): + + package_iterator = cr.PackageIterator( + primary_path=PRIMARY_ERROR_00_PATH, filelists_path=FILELISTS_ERROR_00_PATH, other_path=OTHER_ERROR_00_PATH, + ) + + with self.assertRaises(cr.CreaterepoCError) as ctx: + packages = list(package_iterator) + + + def test_xml_parser_pkg_iterator_newpkgcb_abort(self): + def newpkgcb(pkgId, name, arch): + raise Error("Foo error") + + package_iterator = cr.PackageIterator( + primary_path=REPO_02_PRIXML, filelists_path=REPO_02_FILXML, other_path=REPO_02_OTHXML, + newpkgcb=newpkgcb, + ) + + with self.assertRaises(cr.CreaterepoCError) as ctx: + packages = list(package_iterator) + + + def test_xml_parser_pkg_iterator_warningcb_abort(self): + def warningcb(type, msg): + raise Error("Foo error") + + package_iterator = cr.PackageIterator( + primary_path=PRIMARY_MULTI_WARN_00_PATH, filelists_path=FILELISTS_MULTI_WARN_00_PATH, other_path=OTHER_MULTI_WARN_00_PATH, + warningcb=warningcb, + ) + + with self.assertRaises(cr.CreaterepoCError) as ctx: + packages = list(package_iterator) + + + def test_xml_parser_pkg_iterator_duplicate_pkgs(self): + # This shouldn't really be allowed, the same NEVRA (or) pkgid shouldn't be present twice in a repo. + # but it is unfortunately common so parsing the repos should be possible. + package_iterator = cr.PackageIterator( + primary_path=PRIMARY_DUPLICATE_PACKAGES_PATH, + filelists_path=FILELISTS_DUPLICATE_PACKAGES_PATH, + other_path=OTHER_DUPLICATE_PACKAGES_PATH, + ) + + packages = list(package_iterator) + + self.assertEqual(len(packages), 4) + self.assertEqual(packages[0].nevra(), "fake_bash-0:1.1.1-1.x86_64") + self.assertEqual(packages[1].nevra(), "fake_bash-0:1.1.1-1.x86_64") + self.assertEqual(packages[2].nevra(), "super_kernel-0:6.0.1-2.x86_64") + self.assertEqual(packages[3].nevra(), "super_kernel-0:6.0.1-2.x86_64") + + self.assertEqual(packages[0].pkgId, "90f61e546938a11449b710160ad294618a5bd3062e46f8cf851fd0088af184b7") + self.assertEqual(packages[1].pkgId, "90f61e546938a11449b710160ad294618a5bd3062e46f8cf851fd0088af184b7") + self.assertEqual(packages[2].pkgId, "6d43a638af70ef899933b1fd86a866f18f65b0e0e17dcbf2e42bfd0cdd7c63c3") + self.assertEqual(packages[3].pkgId, "6d43a638af70ef899933b1fd86a866f18f65b0e0e17dcbf2e42bfd0cdd7c63c3") + + self.assertEqual(len(packages[0].files), 1) + self.assertEqual(len(packages[1].files), 1) + self.assertEqual(len(packages[2].files), 2) + self.assertEqual(len(packages[3].files), 2) + + self.assertEqual(len(packages[0].changelogs), 1) + self.assertEqual(len(packages[1].changelogs), 1) + self.assertEqual(len(packages[2].changelogs), 2) + self.assertEqual(len(packages[3].changelogs), 2) + + self.assertRaises(StopIteration, next, package_iterator) + self.assertTrue(package_iterator.is_finished()) diff --git a/tests/test_xml_parser_main_metadata_together.c b/tests/test_xml_parser_main_metadata_together.c index fc05951..8ce417d 100644 --- a/tests/test_xml_parser_main_metadata_together.c +++ b/tests/test_xml_parser_main_metadata_together.c @@ -51,6 +51,33 @@ pkgcb_interrupt(cr_Package *pkg, void *cbdata, GError **err) } static int +newpkgcb(cr_Package **pkg, + G_GNUC_UNUSED const char *pkgId, + const char *name, + G_GNUC_UNUSED const char *arch, + G_GNUC_UNUSED void *cbdata, + GError **err) +{ + g_assert(pkg != NULL); + g_assert(*pkg == NULL); + g_assert(pkgId != NULL); + g_assert(!err || *err == NULL); + + *pkg = cr_package_new(); + cr_Package **list; + + if (cbdata) { + cr_Package **pkgl = cbdata; + for (; *pkgl; pkgl++) { + continue; + } + *pkgl = *pkg; + } + + return CR_CB_RET_OK; +} + +static int newpkgcb_skip_fake_bash(cr_Package **pkg, G_GNUC_UNUSED const char *pkgId, const char *name, @@ -156,6 +183,7 @@ test_cr_xml_parse_main_metadata_together_02_invalid_path(void) NULL, NULL, pkgcb, &parsed, NULL, NULL, &tmp_err); g_assert(tmp_err != NULL); g_assert_cmpint(ret, ==, CRE_NOFILE); + g_error_free(tmp_err); } static void @@ -242,6 +270,27 @@ test_cr_xml_parse_main_metadata_together_07_warningcb_interrupt(void) } static void +test_cr_xml_parse_main_metadata_together_071_multiple_warningcb(void) +{ + char *warnmsgs; + GString *warn_strings = g_string_new(0); + cr_Package *list_of_pkgs[2] = {NULL, NULL}; + GError *tmp_err = NULL; + int ret = cr_xml_parse_main_metadata_together(TEST_PRIMARY_MULTI_WARN_00, TEST_FILELISTS_MULTI_WARN_00, TEST_OTHER_MULTI_WARN_00, + newpkgcb, &list_of_pkgs, NULL, NULL, warningcb, warn_strings, &tmp_err); + g_assert(tmp_err == NULL); + g_assert(list_of_pkgs[0] != NULL); + g_assert(list_of_pkgs[1] != NULL); + cr_package_free(list_of_pkgs[0]); + cr_package_free(list_of_pkgs[1]); + g_assert_cmpint(ret, ==, CRE_OK); + warnmsgs = g_string_free(warn_strings, FALSE); + g_assert_cmpstr(warnmsgs, ==, "Unknown element \"fooelement\";Missing attribute \"type\" of a package element;Unknown element \"foo\";Conversion of \"foobar\" to integer failed;Unknown element \"bar\";Missing attribute \"arch\" of a package element;Unknown file type \"xxx\";Unknown element \"bar\";Missing attribute \"name\" of a package element;Unknown element \"bar\";Conversion of \"xxx\" to integer failed;"); + g_free(warnmsgs); +} + + +static void test_cr_xml_parse_main_metadata_together_08_long_primary(void) { int parsed = 0; @@ -253,6 +302,76 @@ test_cr_xml_parse_main_metadata_together_08_long_primary(void) g_assert_cmpint(parsed, ==, 2); } +static void +test_cr_xml_package_iterator_00(void) +{ + int parsed = 0; + GError *tmp_err = NULL; + cr_Package *package = NULL; + + cr_PkgIterator *pkg_iterator = cr_PkgIterator_new( + TEST_REPO_02_PRIMARY, TEST_REPO_02_FILELISTS, TEST_REPO_02_OTHER, NULL, NULL, NULL, NULL, &tmp_err); + + while ((package = cr_PkgIterator_parse_next(pkg_iterator, &tmp_err))) { + parsed++; + cr_package_free(package); + } + + g_assert(cr_PkgIterator_is_finished(pkg_iterator)); + cr_PkgIterator_free(pkg_iterator, &tmp_err); + + g_assert(tmp_err == NULL); + g_assert_cmpint(parsed, ==, 2); +} + + +static void +test_cr_xml_package_iterator_01_warningcb_interrupt(void) +{ + int parsed = 0; + int numofwarnings = 0; + GError *tmp_err = NULL; + cr_Package *package = NULL; + + cr_PkgIterator *pkg_iterator = cr_PkgIterator_new( + TEST_REPO_02_PRIMARY, TEST_MRF_BAD_TYPE_FIL, TEST_REPO_02_OTHER, NULL, NULL, warningcb_interrupt, &numofwarnings, &tmp_err); + + while ((package = cr_PkgIterator_parse_next(pkg_iterator, &tmp_err))) { + parsed++; + cr_package_free(package); + } + + cr_PkgIterator_free(pkg_iterator, &tmp_err); + + g_assert(tmp_err != NULL); + g_assert_cmpint(parsed, ==, 0); + g_assert_cmpint(tmp_err->code, ==, CRE_CBINTERRUPTED); + g_assert_cmpint(numofwarnings, ==, 1); + g_clear_error(&tmp_err); +} + +static void +test_cr_xml_package_iterator_02_long_primary(void) +{ + int parsed = 0; + GError *tmp_err = NULL; + cr_Package *package = NULL; + + cr_PkgIterator *pkg_iterator = cr_PkgIterator_new( + TEST_LONG_PRIMARY, TEST_REPO_02_FILELISTS, TEST_REPO_02_OTHER, NULL, NULL, NULL, NULL, &tmp_err); + + while ((package = cr_PkgIterator_parse_next(pkg_iterator, &tmp_err))) { + parsed++; + cr_package_free(package); + } + + g_assert(cr_PkgIterator_is_finished(pkg_iterator)); + cr_PkgIterator_free(pkg_iterator, &tmp_err); + + g_assert(tmp_err == NULL); + g_assert_cmpint(parsed, ==, 2); +} + int main(int argc, char *argv[]) { @@ -285,5 +404,17 @@ main(int argc, char *argv[]) g_test_add_func("/xml_parser_main_metadata/test_cr_xml_parse_main_metadata_together_08_long_primary", test_cr_xml_parse_main_metadata_together_08_long_primary); + g_test_add_func("/xml_parser_main_metadata/test_cr_xml_parse_main_metadata_together_071_multiple_warningcb", + test_cr_xml_parse_main_metadata_together_071_multiple_warningcb); + + g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00", + test_cr_xml_package_iterator_00); + + g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_01_warningcb_interrupt", + test_cr_xml_package_iterator_01_warningcb_interrupt); + + g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_02_long_primary", + test_cr_xml_package_iterator_02_long_primary); + return g_test_run(); } diff --git a/tests/testdata/repo_with_duplicate_packages/repodata/filelists.xml.gz b/tests/testdata/repo_with_duplicate_packages/repodata/filelists.xml.gz Binary files differnew file mode 100644 index 0000000..80698ac --- /dev/null +++ b/tests/testdata/repo_with_duplicate_packages/repodata/filelists.xml.gz diff --git a/tests/testdata/repo_with_duplicate_packages/repodata/other.xml.gz b/tests/testdata/repo_with_duplicate_packages/repodata/other.xml.gz Binary files differnew file mode 100644 index 0000000..105e3ff --- /dev/null +++ b/tests/testdata/repo_with_duplicate_packages/repodata/other.xml.gz diff --git a/tests/testdata/repo_with_duplicate_packages/repodata/primary.xml.gz b/tests/testdata/repo_with_duplicate_packages/repodata/primary.xml.gz Binary files differnew file mode 100644 index 0000000..9642d3e --- /dev/null +++ b/tests/testdata/repo_with_duplicate_packages/repodata/primary.xml.gz diff --git a/tests/testdata/repo_with_duplicate_packages/repodata/repomd.xml b/tests/testdata/repo_with_duplicate_packages/repodata/repomd.xml new file mode 100644 index 0000000..2ec6751 --- /dev/null +++ b/tests/testdata/repo_with_duplicate_packages/repodata/repomd.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8"?> +<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm"> + <revision>1615686706</revision> + <data type="primary"> + <checksum type="sha256">b048c7149921446d912be7635413ef00f6702b70942abd22b1a205b22bce177b</checksum> + <open-checksum type="sha256">39c6c1965f88979f96735d57e1ca6867915974fd96953f7f37c8702de26aae8f</open-checksum> + <location href="repodata/primary.xml.gz"/> + <timestamp>1646026966</timestamp> + <size>1017</size> + <open-size>6393</open-size> + </data> + <data type="filelists"> + <checksum type="sha256">c8dcf8ac18756f9395ab01acd7a0677fea037728c54ca430b8ce5d7148033fdd</checksum> + <open-checksum type="sha256">870cecda9e02bfcf9e1cc1d9e5980463e5ba33f9f82feadb9af4a051f17b281d</open-checksum> + <location href="repodata/filelists.xml.gz"/> + <timestamp>1646026966</timestamp> + <size>341</size> + <open-size>1041</open-size> + </data> + <data type="other"> + <checksum type="sha256">f277248a44ecccc14c9cbe782c70af632dcdedb2befcc79a2db93442431cffe5</checksum> + <open-checksum type="sha256">42cb8bd965c3cf1ad48a67cba44804177e1beb76c4fcbd1d0c94527e4d743a88</open-checksum> + <location href="repodata/other.xml.gz"/> + <timestamp>1646026966</timestamp> + <size>405</size> + <open-size>1509</open-size> + </data> +</repomd> |