diff options
Diffstat (limited to 'examples/python/repodata_parsing.py')
-rwxr-xr-x | examples/python/repodata_parsing.py | 108 |
1 files changed, 56 insertions, 52 deletions
diff --git a/examples/python/repodata_parsing.py b/examples/python/repodata_parsing.py index 518962a..e07d7ae 100755 --- a/examples/python/repodata_parsing.py +++ b/examples/python/repodata_parsing.py @@ -77,18 +77,63 @@ def print_package_info(pkg): print("Changelogs:") print_changelogs(pkg.changelogs) -def first_method(): - """Use of this method is discouraged.""" + +def streaming_iterator(): + """Parsing main metadata types (primary, filelists, other) at the same time. + This approach significantly reduces memory footprint because we don't need + to keep all the packages in memory, user can handle them one by one. + + This is the most flexible method, and the recommended one if you need all of the + RPM metadata. If you only need to parse one file it might not be the most efficient. + """ + + def warningcb(warning_type, message): + print("PARSER WARNING: %s" % message) + return True + + repomd = cr.Repomd() + cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"), repomd, warningcb) + + primary_xml_path = None + filelists_xml_path = None + other_xml_path = None + for record in repomd.records: + if record.type == "primary": + primary_xml_path = os.path.join(REPO_PATH, record.location_href) + elif record.type == "filelists": + filelists_xml_path = os.path.join(REPO_PATH, record.location_href) + elif record.type == "other": + other_xml_path = os.path.join(REPO_PATH, record.location_href) + + # + # Main XML metadata parsing (primary, filelists, other) + # + package_iterator = cr.PackageIterator(primary_path=primary_xml_path, + filelists_path=filelists_xml_path, + other_path=other_xml_path, + warningcb=warningcb) + + for pkg in package_iterator: + # Called when whole package entry from all 3 metadata xml files is parsed + print_package_info(pkg) + + +def oneshot(): + """Parse all repo metadata for a given repo path. + + Use of this method is discouraged. + """ md = cr.Metadata() md.locate_and_load_xml(REPO_PATH) for key in md.keys(): pkg = md.get(key) print_package_info(pkg) -def second_method(): - """Preferred method for repodata parsing. - Important callbacks for repodata parsing: +def oneshot_callback(): + """Parse one file at a time into a set of packages. + + Use of this method is discouraged. newpkgcb -------- @@ -211,58 +256,17 @@ def second_method(): for pkg in packages.values(): print_package_info(pkg) -def third_method(): - """Parsing main metadata types (primary, filelists, other) at the same time. - This approach significantly reduces memory footprint because we don't need - to keep all the packages in memory, user can handle them one by one. - - The API reflects xml_parse_primary/filelists/other except that it handles - all of them at the same time. - - """ - def warningcb(warning_type, message): - print("PARSER WARNING: %s" % message) - return True - - repomd = cr.Repomd() - cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"), repomd, warningcb) - - primary_xml_path = None - filelists_xml_path = None - other_xml_path = None - for record in repomd.records: - if record.type == "primary": - primary_xml_path = os.path.join(REPO_PATH, record.location_href) - elif record.type == "filelists": - filelists_xml_path = os.path.join(REPO_PATH, record.location_href) - elif record.type == "other": - other_xml_path = os.path.join(REPO_PATH, record.location_href) - - # - # Main XML metadata parsing (primary, filelists, other) - # - - def pkgcb(pkg): - # Called when whole package entry from all 3 metadata xml files is parsed - print_package_info(pkg) - - cr.xml_parse_main_metadata_together(primary_xml_path, - filelists_xml_path, - other_xml_path, - None, - pkgcb, - warningcb) if __name__ == "__main__": - print('"All in one shot" method:') - first_method() + print("Streaming iterator based method:") + streaming_iterator() print() - print("Callback based method:") - second_method() + print('"All in one shot" method:') + oneshot() print() - print("Streaming callback based method:") - third_method() + print("Callback based method:") + oneshot_callback() |