#!/usr/bin/env python3 # # Copyright (c) 2015 Jon Turney # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # # write package listing HTML files # # - build a list of all files under HTDOCS/packages/ # - for each package in the package database # --- create a .htaccess file in the package directory, if not present # -- for each tar file # --- if a package listing HTML file doesn't already exist # ---- write a HTML package listing file listing the tar file contents # -- write a summary file, if set of versions changed # - write packages.inc, the list of packages # - remove any .htaccess or listing files for which there was no package # - remove any directories which are now empty # # note that the directory hierarchy of (noarch|arch)/package/subpackages is # flattened in the package listing to just the package name # import argparse import glob import html import logging import math import os import re import string import sys import textwrap import time from typing import NamedTuple import xtarfile from . import common_constants from . import maintainers from . import package from . import utils from .version import SetupVersion # # get sdesc for a package # def sdesc(po, bv): header = po.version_hints[bv]['sdesc'] header = header.strip('"') return html.escape(header, quote=False) # # ditto for ldesc # def ldesc(po, bv): if 'ldesc' in po.version_hints[bv]: header = po.version_hints[bv]['ldesc'] else: return sdesc(po, bv) header = header.strip('"') # escape html entities header = html.escape(header, quote=False) header = header.replace('\n\n', '\n
\n') # try to recognize things which look like bullet points header = re.sub(r'\n(\s*[*-]\s)', r'
\n\1', header) # linkify things which look like hyperlinks header = re.sub(r'http(s|)://[^\s\)]*', r'\g<0>', header) return header # # try hard to find a package object for package p # def arch_package(packages, p): for arch in common_constants.ARCHES: if p in packages[arch]: return packages[arch][p] return None # # build a dict of the arches which contain package p # def arch_packages(packages, p): result = {} for arch in common_constants.ARCHES: if p in packages[arch]: result[arch] = packages[arch][p] return result # # ensure a directory exists # def ensure_dir_exists(args, path): if not args.dryrun: utils.makedirs(path) os.chmod(path, 0o755) # # format a unix epoch time (UTC) # def tsformat(ts): return time.strftime('%Y-%m-%d %H:%M', time.gmtime(ts)) # # # def update_package_listings(args, packages): package_list = set() update_summary = set() for arch in packages: update_summary.update(write_arch_listing(args, packages[arch], arch)) package_list.update(packages[arch]) summaries = os.path.join(args.htdocs, 'summary') ensure_dir_exists(args, summaries) pkg_maintainers = maintainers.pkg_list(args.pkglist) toremove = glob.glob(os.path.join(summaries, '*')) def linkify_package(pkg): p = re.sub(r'(.*)\s+\(.*\)', r'\1', pkg) if p in package_list: pn = arch_package(packages, p).orig_name text = re.sub(re.escape(p), pn, pkg) return '%s' % (p, text) logging.debug('package linkification failed for %s' % p) return p for p in package_list: # # write package summary # # (these exist in a separate directory to prevent their contents being # searched by the package search script) # summary = os.path.join(summaries, p + '.html') # this file should exist, so remove from the toremove list if summary in toremove: toremove.remove(summary) # if listing files were added or removed, or it doesn't already exist, # or force, update the summary if p in update_summary or not os.path.exists(summary) or args.force: if not args.dryrun: with utils.open_amifc(summary) as f: os.fchmod(f.fileno(), 0o755) pos = arch_packages(packages, p) if not pos: continue po = next(iter(pos.values())) bv = po.best_version if po.kind == package.Kind.source: pn = po.orig_name title = "Cygwin Package Summary for %s (source)" % pn kind = "Source Package" else: pn = p title = "Cygwin Package Summary for %s" % p kind = "Package" print(textwrap.dedent('''\ %s

%s: %s

''' % (title, kind, pn)), file=f) details_table = {} details_table['summary'] = sdesc(po, bv) details_table['description'] = ldesc(po, bv) details_table['categories'] = po.version_hints[bv].get('category', '') class PackageData(NamedTuple): is_attr: bool = False summarize_limit: int = 0 if po.kind == package.Kind.source: details = {'build-depends': PackageData()} else: details = { 'depends': PackageData(), 'obsoletes': PackageData(), 'obsoleted_by': PackageData(is_attr=True), 'provides': PackageData(), 'conflicts': PackageData(), 'rdepends': PackageData(is_attr=True, summarize_limit=10), 'build_rdepends': PackageData(is_attr=True, summarize_limit=10) } for key in details: # make the union of the package list for this detail # across arches, and then annotate any items which don't # appear for all arches value = {} values = set() for arch in pos: if details[key].is_attr: value[arch] = getattr(pos[arch], key, set()) else: t = pos[arch].version_hints[pos[arch].best_version].get(key, None) if t: value[arch] = set(t.split(', ')) else: value[arch] = set() values.update(value[arch]) if values: detail = [] for detail_pkg in sorted(values): if all(detail_pkg in value[arch] for arch in pos): detail.append(linkify_package(detail_pkg)) else: detail.append(linkify_package(detail_pkg) + ' (%s)' % (','.join([arch for arch in pos if detail_pkg in value[arch]]))) limit = details[key].summarize_limit if limit and len(detail) > limit: details_table[key] = '
(%s)%s
' % (len(detail), ', '.join(detail)) else: details_table[key] = ', '.join(detail) if po.kind == package.Kind.source: es = p install_packages = set() for arch in pos: install_packages.update(pos[arch].is_used_by) details_table['install package(s)'] = ', '.join([linkify_package(p) for p in sorted(install_packages)]) homepage = po.version_hints[po.best_version].get('homepage', None) if homepage: details_table['homepage'] = '%s' % (homepage, homepage) lic = po.version_hints[po.best_version].get('license', None) if lic: details_table['license'] = '%s (SPDX)' % (lic) else: es = po.srcpackage(bv) details_table['source package'] = linkify_package(es) es_po = arch_package(packages, es) if not es_po: es_po = po m_pn = es_po.orig_name if m_pn not in pkg_maintainers: m = None pkg_groups = None else: if pkg_maintainers[m_pn].is_orphaned(): m = 'ORPHANED' else: m = ', '.join(sorted(pkg_maintainers[m_pn].maintainers())) pkg_groups = pkg_maintainers[m_pn].groups() if m: details_table['maintainer(s)'] = m + textwrap.dedent(''' (Use the mailing list to report bugs or ask questions. Do not contact the maintainer(s) directly.)''') if pkg_groups: details_table['groups'] = ','.join(pkg_groups) if po.kind == package.Kind.source: if args.repodir: repo = os.path.join(args.repodir, '%s.git' % pn) if os.path.exists(repo): repo_browse_url = '/cgit/cygwin-packages/%s/' % pn details_table['packaging repository'] = '%s.git' % (repo_browse_url, pn) # output details table print('', file=f) for d, v in details_table.items(): if not v.startswith('

'): v = '

' + v + '

' print('' % (d, v), file=f) print('

%s:

%s
', file=f) # output per-arch package versions table print('', file=f) print(textwrap.dedent('''\
'''), file=f) for r in toremove: logging.debug('rm %s' % r) if not args.dryrun: os.unlink(r) write_packages_inc(args, packages, 'packages.inc', package.Kind.binary, 'package_list.html') write_packages_inc(args, packages, 'src_packages.inc', package.Kind.source, 'src_package_list.html') # # write package index page fragment for inclusion # def write_packages_inc(args, packages, name, kind, includer): packages_inc = os.path.join(args.htdocs, name) if not args.dryrun: def touch_including(changed): if changed: # touch the including file for the benefit of 'XBitHack full' package_list = os.path.join(args.htdocs, includer) if os.path.exists(package_list): logging.info("touching %s for the benefit of 'XBitHack full'" % (package_list)) utils.touch(package_list) with utils.open_amifc(packages_inc, cb=touch_including) as index: os.fchmod(index.fileno(), 0o644) # This list contains all packages in any arch. Source packages # appear under their original package name. package_list = {} for arch in packages: for p in packages[arch]: if p.endswith('-debuginfo'): continue if packages[arch][p].not_for_output: continue if packages[arch][p].kind == kind: package_list[packages[arch][p].orig_name] = p jumplist = set() for k in package_list: p = package_list[k] c = p[0].lower() if c in string.ascii_lowercase: jumplist.add(c) print('

', file=index) print('%d packages : ' % len(package_list), file=index) print(' - \n'.join(['%s' % (c, c) for c in sorted(jumplist)]), file=index) print('

', file=index) print('', file=index) first = ' class="pkgname"' jump = '' for k in sorted(package_list, key=package.sort_key): p = package_list[k] po = arch_package(packages, p) if not po: continue bv = po.best_version header = sdesc(po, bv) if po.kind == package.Kind.source: pn = po.orig_name if 'source' not in header: header += ' (source)' else: pn = p anchor = '' if jump != p[0].lower(): jump = p[0].lower() if jump in jumplist: anchor = ' id="%s"' % (jump) print('%s' % (anchor, first, p, pn, header), file=index) first = '' print('
%s
', file=index) def write_arch_listing(args, packages, arch): update_summary = set() base = os.path.join(args.htdocs, arch) ensure_dir_exists(args, base) # # write base directory .htaccess, if needed # # force trying to access the base directory to redirect to the package list # page, as having the server index this directory containing lots of # subdirectories makes this URL very expensive to serve if someone stumbles # onto it by accident) # htaccess = os.path.join(base, '.htaccess') if not os.path.exists(htaccess) or args.force: if not args.dryrun: with utils.open_amifc(htaccess) as f: print('Redirect temp /packages/%s/index.html https://cygwin.com/packages/package_list.html' % (arch), file=f) toremove = glob.glob(os.path.join(base, '*', '*')) + glob.glob(os.path.join(base, '*', '.*')) for p in packages: dirpath = os.path.join(base, p) ensure_dir_exists(args, dirpath) # # write .htaccess if needed # htaccess = os.path.join(dirpath, '.htaccess') if not os.path.exists(htaccess): if not args.dryrun or args.force: with utils.open_amifc(htaccess) as f: # We used to allow access to the directory listing as a # crude way of listing the versions of the package available # for which file lists were available. Redirect that index # page to the summary page, which now has that information # (and more). print('RedirectMatch temp /packages/%s/%s/$ /packages/summary/%s.html' % (arch, p, p), file=f) # listing files don't have the extension, but are html print('ForceType text/html', file=f) # this file should exist, so remove from the toremove list if htaccess in toremove: toremove.remove(htaccess) # # for each tarfile, write tarfile listing # if os.path.exists(dirpath): listings = os.listdir(dirpath) listings.remove('.htaccess') else: listings = [] for to in packages[p].tarfiles.values(): tn = to.repopath.fn fver = re.sub(r'\.tar.*$', '', tn) listing = os.path.join(dirpath, fver) # ... if it doesn't already exist, or --force --force if not os.path.exists(listing) or (args.force > 1): if not args.dryrun: # versions are being added, so summary needs updating update_summary.add(p) with utils.open_amifc(listing) as f: bv = packages[p].best_version desc = sdesc(packages[p], bv) if fver.endswith('-src'): desc = desc + " (source)" print(textwrap.dedent('''\ %s: %s

%s: %s

''' % (p, desc, p, p, desc)), file=f)

                        tf = to.repopath.abspath(args.rel_area)
                        if not os.path.exists(tf):
                            # this shouldn't happen with a full mirror
                            logging.error("tarfile %s not found" % (tf))
                        elif os.path.getsize(tf) <= 32:
                            # compressed empty files aren't a valid tar file,
                            # but we can just ignore them
                            pass
                        else:
                            try:
                                with xtarfile.open(tf, mode='r') as a:
                                    for i in a:
                                        print('    %-16s%12d %s' % (time.strftime('%Y-%m-%d %H:%M', time.gmtime(i.mtime)), i.size, i.name), file=f, end='')
                                        if i.isdir():
                                            print('/', file=f, end='')
                                        if i.issym() or i.islnk():
                                            print(' -> %s' % i.linkname, file=f, end='')
                                        print('', file=f)
                            except Exception as e:
                                print('package is corrupted', file=f)
                                logging.error("exception %s while reading %s" % (type(e).__name__, tf))
                                logging.debug('', exc_info=True)

                        print(textwrap.dedent('''\
                                                 
'''), file=f) else: logging.log(5, 'not writing %s, already exists' % listing) # this file should exist, so remove from the toremove list if listing in toremove: toremove.remove(listing) if fver in listings: listings.remove(fver) # some versions remain on toremove list, and will be removed, so summary # needs updating if listings: update_summary.add(p) # # remove any remaining files for which there was no corresponding package # for r in toremove: logging.debug('rm %s' % r) if not args.dryrun: os.unlink(r) # # remove any directories which are now empty # dirpath = os.path.dirname(r) if len(os.listdir(dirpath)) == 0: logging.debug('rmdir %s' % dirpath) os.rmdir(os.path.join(dirpath)) return update_summary if __name__ == "__main__": htdocs_default = os.path.join(common_constants.HTDOCS, 'packages') relarea_default = common_constants.FTP parser = argparse.ArgumentParser(description='Write HTML package listings') parser.add_argument('--arch', action='store', required=True, choices=common_constants.ARCHES) parser.add_argument('--force', action='store_true', help="overwrite existing files") parser.add_argument('--htdocs', action='store', metavar='DIR', help="htdocs output directory (default: " + htdocs_default + ")", default=htdocs_default) parser.add_argument('--releasearea', action='store', metavar='DIR', help="release directory (default: " + relarea_default + ")", default=relarea_default, dest='rel_area') parser.add_argument('-n', '--dry-run', action='store_true', dest='dryrun', help="don't do anything") parser.add_argument('-v', '--verbose', action='count', dest='verbose', help='verbose output') (args) = parser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.INFO) logging.basicConfig(format=os.path.basename(sys.argv[0]) + ': %(message)s') packages, _ = package.read_packages(args.rel_area, args.arch) update_package_listings(args, packages, args.arch)