From 3e0eb64ae31b39a67111c0401e8cda7d0c43a482 Mon Sep 17 00:00:00 2001 From: Jon Turney Date: Mon, 5 Dec 2022 11:41:24 +0000 Subject: Clean up some obsolete tools which are only useful when we have multiple arches --- calm/compare-arches | 129 ----------------------------- calm/dedupsrc.py | 214 ------------------------------------------------ calm/find-duplicates.py | 176 --------------------------------------- 3 files changed, 519 deletions(-) delete mode 100755 calm/compare-arches delete mode 100644 calm/dedupsrc.py delete mode 100644 calm/find-duplicates.py diff --git a/calm/compare-arches b/calm/compare-arches deleted file mode 100755 index 4e2dd19..0000000 --- a/calm/compare-arches +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2015 Jon Turney -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# - -# -# compare-arch - compare package sets between architectures -# - -import argparse -import logging -import os -import sys - -import common_constants -import maintainers -import package - - -# -# -# - -def main(args): - packages = {} - pset = {} - union = set() - - for arch in common_constants.ARCHES: - # build package list - packages[arch] = package.read_packages(args.rel_area, arch) - - # make a set of the package list - pset[arch] = frozenset(packages[arch].keys()) - - # make the union of all the package lists - union = union.union(pset[arch]) - - # XXX: could filter out obsolete here - - for p in sorted(union): - exists = {} - obs = {} - - for arch in common_constants.ARCHES: - exists[arch] = False - obs[arch] = True - if p in packages[arch]: - exists[arch] = True - if '_obsolete' in packages[arch][p].hints.get('category', ''): - obs[arch] = True - else: - obs[arch] = False - - if p.startswith('cygwin32') or p.startswith('cygwin64') or p.endswith('-debuginfo'): - continue - - # packages which exist in all arches - if all(exists.values()): - - # report packages which are obsolete only in some arch - if any(obs.values()) and not all(obs.values()): - print("%s is in all arches, but obsolete only in %s" % (p, [a for a in obs if obs[a]])) - - continue - - # ignore packages which are obsolete in all the arches they exist in - if all(obs.values()): - continue - - -# print(p, exists, obs) - print("%s is only in arch %s" % (p, [a for a in exists if exists[a]])) - - # are there any packages which have a maintainer, but don't exist? - mlist = maintainers.read(args, getattr(args, 'orphanmaint', None)) - all_packages = maintainers.all_packages(mlist) - - for p in sorted(all_packages): - if p not in union: - logging.warning("package '%s' has a maintainer, but doesn't exist in any architecture" % (p)) - - # find the set of packages which aren't in an arch -# for arch in common_constants.ARCHES: -# diff = union.difference(pset[arch]) -# print("only in %s" % arch) -# print(sorted(diff)) - - -# -# -# - -if __name__ == "__main__": - homedir_default = common_constants.HOMEDIR - pkglist_default = common_constants.PKGMAINT - relarea_default = common_constants.FTP - - parser = argparse.ArgumentParser(description='Compare arch package sets') - parser.add_argument('--homedir', action='store', metavar='DIR', help="maintainer home directory (default: " + homedir_default + ")", default=homedir_default) - parser.add_argument('--pkglist', action='store', metavar='FILE', help="package maintainer list (default: " + pkglist_default + ")", default=pkglist_default) - parser.add_argument('--releasearea', action='store', metavar='DIR', help="release directory (default: " + relarea_default + ")", default=relarea_default, dest='rel_area') - parser.add_argument('-v', '--verbose', action='count', dest='verbose', help='verbose output') - (args) = parser.parse_args() - - if args.verbose: - logging.getLogger().setLevel(logging.INFO) - - logging.basicConfig(format=os.path.basename(sys.argv[0]) + ': %(message)s') - - main(args) diff --git a/calm/dedupsrc.py b/calm/dedupsrc.py deleted file mode 100644 index 2c71d52..0000000 --- a/calm/dedupsrc.py +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2017 Jon Turney -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# - -# -# Move a given source archive to src/ (assuming it is indentical in x86/ and -# x86_64/) and adjust hints appropriately. -# (XXX: could probably be extended to move to noarch/ if not source, as well) -# - -import argparse -import copy -import os -import re -import sys - -from . import common_constants -from . import hint -from . import utils - -binary_only_hints = ['requires', 'depends', 'obsoletes', 'external-source'] - -# -# -# - - -def hint_file_write(fn, hints): - with open(fn, 'w') as f: - for k, v in hints.items(): - print("%s: %s" % (k, v), file=f) - - -# -# -# - -def invent_sdesc(path, vr): - for (dirpath, _subdirs, files) in os.walk(path): - # debuginfo packages never have a good sdesc - if 'debuginfo' in dirpath: - continue - - # but just pick the sdesc from first sub-package which has one ... - for f in files: - if re.match('^.*-' + re.escape(vr) + '.hint$', f): - hints = hint.hint_file_parse(os.path.join(dirpath, f), hint.pvr) - if 'sdesc' in hints: - sdesc = hints['sdesc'] - - # ... which doesn't contain 'Obsoleted' - if 'Obsoleted' in sdesc: - continue - - # remove anything inside parentheses at the end of quoted - # sdesc - sdesc = re.sub(r'"(.*)"', r'\1', sdesc) - sdesc = re.sub(r'(\(.*?\))$', '', sdesc) - sdesc = sdesc.strip() - sdesc = '"' + sdesc + '"' - - return sdesc - - return None -# -# -# - - -def dedup(archive, relarea): - # split path and filename - (path, filename) = os.path.split(archive) - - # parse tarfile name - match = re.match(r'^(.+?)-(\d.*)-src\.tar' + - common_constants.PACKAGE_COMPRESSIONS_RE + r'$', filename) - - if not match: - print('tarfile name %s does not meet expectations' % (filename)) - sys.exit(1) - - p = match.group(1) - vr = match.group(2) - ext = match.group(3) - - # compute filenames - to_filename = p + '-src-' + vr + '.tar.' + ext - hint_filename = p + '-' + vr + '.hint' - to_hint_filename = p + '-src-' + vr + '.hint' - - # read hints for both arches - hints = {} - for arch in ['x86', 'x86_64']: - hint_pathname = os.path.join(relarea, arch, path, hint_filename) - - if not os.path.exists(hint_pathname): - print('%s not found' % (hint_pathname)) - return 1 - - hints[arch] = hint.hint_file_parse(hint_pathname, hint.pvr) - - # remove hints which only have meaning for binary packages - # - # (requires: tends to have libgcc1 more often on x86, so otherwise this - # would cause spurious differences between hints to be reported) - for h in binary_only_hints: - if h in hints[arch]: - del hints[arch][h] - - if hints['x86'] != hints['x86_64']: - print('hints for %s-%s differ between arches' % (p, vr)) - return 1 - - if ('skip' in hints['x86']) and (len(hints['x86']) == 1): - print('hints for %s-%s is skip: only' % (p, vr)) - hints['x86']['category'] = '' - # if hint only contains skip:, try to come up with a plausible sdesc - sdesc = invent_sdesc(os.path.join(relarea, 'x86', path), vr) - if sdesc: - print('suggested sdesc is %s' % (sdesc)) - hints['x86']['sdesc'] = sdesc - - if 'sdesc' not in hints['x86']: - print('hints for %s-%s has no sdesc:' % (p, vr)) - return 1 - - # ensure target directory exists - utils.makedirs(os.path.join(relarea, 'src', path, p + '-src')) - - # write .hint file for new -src package - src_hints = copy.copy(hints['x86']) - - if 'source' not in src_hints['sdesc']: - sdesc = re.sub(r'"(.*)"', r'\1', src_hints['sdesc']) - sdesc += ' (source code)' - src_hints['sdesc'] = '"' + sdesc + '"' - - if 'Source' not in src_hints['category']: - src_hints['category'] = src_hints['category'] + ' Source' - - if 'parse-warnings' in src_hints: - del src_hints['parse-warnings'] - - to_hint_pathname = os.path.join(relarea, 'src', path, p + '-src', to_hint_filename) - print('writing %s' % (to_hint_pathname)) - hint_file_write(to_hint_pathname, src_hints) - - # move the src files to src/ - for arch in ['x86', 'x86_64']: - print('%s -> %s' % (os.path.join(relarea, arch, path, filename), os.path.join(relarea, 'src', path, p + '-src', to_filename))) - os.rename(os.path.join(relarea, arch, path, filename), os.path.join(relarea, 'src', path, p + '-src', to_filename)) - - # adjust external-source in .hint for all subpackages - for arch in ['x86', 'x86_64']: - for (dirpath, _subdirs, files) in os.walk(os.path.join(relarea, arch, path)): - subpkg = os.path.basename(dirpath) - filename = subpkg + '-' + vr + '.hint' - if filename in files: - hint_pathname = os.path.join(dirpath, filename) - hints = hint.hint_file_parse(hint_pathname, hint.pvr) - if 'parse-warnings' in hints: - del hints['parse-warnings'] - if ('skip' in hints): - # p was source only, so no package remains - print('removing %s' % (hint_pathname)) - os.remove(hint_pathname) - elif ('external-source' not in hints) or (hints['external-source'] == p): - hints['external-source'] = p + '-src' - print('writing %s' % (hint_pathname)) - hint_file_write(hint_pathname, hints) - - return 0 - -# -# -# - - -def main(): - relarea_default = common_constants.FTP - - parser = argparse.ArgumentParser(description='Source package deduplicator') - parser.add_argument('archive', metavar='ARCHIVE', nargs=1, help="source archive to deduplicate") - parser.add_argument('--releasearea', action='store', metavar='DIR', help="release directory (default: " + relarea_default + ")", default=relarea_default, dest='rel_area') - (args) = parser.parse_args() - - return dedup(args.archive[0], args.rel_area) - - -# -# -# - -if __name__ == "__main__": - sys.exit(main()) diff --git a/calm/find-duplicates.py b/calm/find-duplicates.py deleted file mode 100644 index 59d8012..0000000 --- a/calm/find-duplicates.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2017 Jon Turney -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# - -import argparse -import hashlib -import os -import re -import sys -import tarfile - -import xtarfile - -from . import common_constants - -# -# look for archives which are duplicated between x86 and x86_64 -# (these should probably be moved to noarch or src) -# - -# -# helper function to compute sha512 for a particular file -# (block_size should be some multiple of sha512 block size which can be -# efficiently read) -# - - -def sha512_file(f, block_size=256 * 128): - sha512 = hashlib.sha512() - - for chunk in iter(lambda: f.read(block_size), b''): - sha512.update(chunk) - - return sha512.hexdigest() - -# -# -# - - -class TarMemberInfo: - def __init__(self, info, sha512): - self.info = info - self.sha512 = sha512 - - -def read_tar(f): - result = {} - - try: - with xtarfile.open(f, mode='r') as t: - for m in t: - if m.isfile(): - f = t.extractfile(m) - sha512 = sha512_file(f) - else: - sha512 = None - result[m.name] = TarMemberInfo(m, sha512) - except tarfile.ReadError: - # if we can't read the tar archive, we should never consider it to have - # the same contents as another tar archive... - result[f] = None - - return result - -# -# -# - - -def compare_archives(f1, f2): - # for speed, first check that archives are of the same size - if os.path.getsize(f1) != os.path.getsize(f2): - return 'different archive size' - - # if they are both compressed empty files (rather than compressed empty tar - # archives), they are the same - if os.path.getsize(f1) <= 32: - return None - - t1 = read_tar(f1) - t2 = read_tar(f2) - - if t1.keys() != t2.keys(): - return 'different member lists' - - for m in t1: - # compare size of member - if t1[m].info.size != t2[m].info.size: - return 'different size for member %s' % m - - # compare type of member - if t1[m].info.type != t2[m].info.type: - return 'different type for member %s' % m - - # for files, compare hash of file content - if t1[m].info.isfile(): - if t1[m].sha512 != t2[m].sha512: - return 'different hash for member %s' % m - # for links, compare target - elif t1[m].info.islnk() or t1[m].info.issym(): - if t1[m].info.linkname != t2[m].info.linkname: - return 'different linkname for member %s' % m - - # permitted differences: mtime, mode, owner uid/gid - - return None - -# -# -# - - -def find_duplicates(args): - basedir = os.path.join(args.rel_area, common_constants.ARCHES[0], 'release') - - for (dirpath, _subdirs, files) in os.walk(basedir): - relpath = os.path.relpath(dirpath, basedir) - otherdir = os.path.join(args.rel_area, common_constants.ARCHES[1], 'release', relpath) - - for f in files: - # not an archive - if not re.match(r'^.*\.tar' + common_constants.PACKAGE_COMPRESSIONS_RE + r'$', f): - continue - - f1 = os.path.join(dirpath, f) - f2 = os.path.join(otherdir, f) - - if os.path.exists(f2): - difference = compare_archives(f1, f2) - if difference is None: - print(os.path.join('release', relpath, f)) - elif args.verbose: - print('%s: %s' % (os.path.join('release', relpath, f), difference)) - -# -# -# - - -def main(): - relarea_default = common_constants.FTP - - parser = argparse.ArgumentParser(description='Source package deduplicator') - parser.add_argument('--releasearea', action='store', metavar='DIR', help="release directory (default: " + relarea_default + ")", default=relarea_default, dest='rel_area') - parser.add_argument('-v', '--verbose', action='count', dest='verbose', help='verbose output') - (args) = parser.parse_args() - - return find_duplicates(args) - - -# -# -# - -if __name__ == "__main__": - sys.exit(main()) -- cgit v1.2.3