#!/usr/bin/env python3 # ## Licensed to the .NET Foundation under one or more agreements. ## The .NET Foundation licenses this file to you under the MIT license. # ## # Title : jitrollingbuild.py # # Notes: # # Script to upload and manage rolling build clrjit and the rolling build storage location # ################################################################################ ################################################################################ import argparse import locale import os import shutil import sys import tempfile import urllib.request import zipfile import re from coreclr_arguments import * locale.setlocale(locale.LC_ALL, '') # Use '' for auto, or force e.g. to 'en_US.UTF-8' ################################################################################ # Azure Storage information ################################################################################ az_account_name = "clrjit2" az_container_name = "jitrollingbuild" az_builds_root_folder = "builds" az_blob_storage_account_uri = "https://" + az_account_name + ".blob.core.windows.net/" az_blob_storage_container_uri = az_blob_storage_account_uri + az_container_name ################################################################################ # Argument Parser ################################################################################ description = """\ Script to upload and manage rolling build clrjit """ upload_description = """\ Upload clrjit to SuperPMI Azure storage. """ download_description = """\ Download clrjit from SuperPMI Azure storage. """ list_description = """\ List clrjit in SuperPMI Azure storage. """ host_os_help = "OS (windows, OSX, Linux). Default: current OS." arch_help = "Architecture (x64, x86, arm, arm64). Default: current architecture." build_type_help = "Build type (Debug, Checked, Release). Default: Checked." git_hash_help = "git hash" target_dir_help = "Directory to put the downloaded JIT." skip_cleanup_help = "Skip intermediate file removal." # Start of parser object creation. parser = argparse.ArgumentParser(description=description) subparsers = parser.add_subparsers(dest='mode', help="Command to invoke") # Common parser for git_hash/arch/build_type/host_os arguments common_parser = argparse.ArgumentParser(add_help=False) common_parser.add_argument("-arch", help=arch_help) common_parser.add_argument("-build_type", default="Checked", help=build_type_help) common_parser.add_argument("-host_os", help=host_os_help) # subparser for upload upload_parser = subparsers.add_parser("upload", description=upload_description, parents=[common_parser]) upload_parser.add_argument("-git_hash", required=True, help=git_hash_help) upload_parser.add_argument("-az_storage_key", help="Key for the clrjit Azure Storage location. Default: use the value of the CLRJIT_AZ_KEY environment variable.") upload_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help) # subparser for download download_parser = subparsers.add_parser("download", description=download_description, parents=[common_parser]) download_parser.add_argument("-git_hash", required=True, help=git_hash_help) download_parser.add_argument("-target_dir", required=True, help=target_dir_help) download_parser.add_argument("--skip_cleanup", action="store_true", help=skip_cleanup_help) # subparser for list list_parser = subparsers.add_parser("list", description=list_description, parents=[common_parser]) list_parser.add_argument("-git_hash", help=git_hash_help) list_parser.add_argument("--all", action="store_true", help="Show all JITs, not just those for the specified (or default) git hash, OS, architecture, and flavor") ################################################################################ # Helper classes ################################################################################ class TempDir: """ Class to create a temporary working directory, or use one that is passed as an argument. Use with: "with TempDir() as temp_dir" to change to that directory and then automatically change back to the original working directory afterwards and remove the temporary directory and its contents (if args.skip_cleanup is False). """ def __init__(self, path=None): self.mydir = tempfile.mkdtemp() if path is None else path self.cwd = None def __enter__(self): self.cwd = os.getcwd() os.chdir(self.mydir) return self.mydir def __exit__(self, exc_type, exc_val, exc_tb): os.chdir(self.cwd) # Note: we are using the global `args`, not coreclr_args. This works because # the `skip_cleanup` argument is not processed by CoreclrArguments, but is # just copied there. if not args.skip_cleanup: shutil.rmtree(self.mydir) class ChangeDir: """ Class to temporarily change to a given directory. Use with "with". """ def __init__(self, mydir): self.mydir = mydir self.cwd = None def __enter__(self): self.cwd = os.getcwd() os.chdir(self.mydir) def __exit__(self, exc_type, exc_val, exc_tb): os.chdir(self.cwd) def determine_jit_name(coreclr_args): """ Determine the jit based on the OS. If "-altjit" is specified, then use the specified altjit, or an appropriate altjit based on target. Args: coreclr_args (CoreclrArguments): parsed args Return: jit_name(str) : name of the jit for this os """ jit_base_name = "clrjit" if coreclr_args.host_os == "OSX": return "lib" + jit_base_name + ".dylib" elif coreclr_args.host_os == "Linux": return "lib" + jit_base_name + ".so" elif coreclr_args.host_os == "windows": return jit_base_name + ".dll" else: raise RuntimeError("Unknown OS.") def list_az_jits(filter_func=lambda unused: True, prefix_string = None): """ List the JITs in Azure Storage using REST api Args: filter_func (lambda: string -> bool): filter to apply to the list. The filter takes a URL and returns True if this URL is acceptable. prefix_string: Optional. Specifies a string prefix for the Azure Storage query. Returns: urls (list): set of URLs in Azure Storage that match the filter. Notes: This method does not require installing the Azure Storage python package. """ # This URI will return *all* the blobs, for all git_hash/OS/architecture/build_type combinations. # pass "prefix=foo/bar/..." to only show a subset. Or, we can filter later using string search. # # Note that there is a maximum number of results returned in one query of 5000. So we might need to # iterate. In that case, the XML result contains a `` element like: # # 2!184!MDAwMDkyIWJ1aWxkcy8wMTZlYzI5OTAzMzkwMmY2ZTY4Yzg0YWMwYTNlYzkxN2Y5MzA0OTQ2L0xpbnV4L3g2NC9DaGVja2VkL2xpYmNscmppdF93aW5fYXJtNjRfeDY0LnNvITAwMDAyOCE5OTk5LTEyLTMxVDIzOjU5OjU5Ljk5OTk5OTlaIQ-- # # which we need to pass to the REST API with `marker=...`. urls = [] list_az_container_uri_root = az_blob_storage_container_uri + "?restype=container&comp=list&prefix=" + az_builds_root_folder + "/" if prefix_string: list_az_container_uri_root += prefix_string iter = 1 marker = "" while True: list_az_container_uri = list_az_container_uri_root + marker try: contents = urllib.request.urlopen(list_az_container_uri).read().decode('utf-8') except Exception as exception: print("Didn't find any collections using {}".format(list_az_container_uri)) print(" Error: {}".format(exception)) return None # Contents is an XML file with contents like: # # builds/ # # # builds/755f01659f03196487ec41225de8956911f8049b/Linux/x64/Checked/libclrjit.so # https://clrjit2.blob.core.windows.net/jitrollingbuild/builds/755f01659f03196487ec41225de8956911f8049b/Linux/x64/Checked/libclrjit.so # # ... # # # # builds/755f01659f03196487ec41225de8956911f8049b/OSX/x64/Checked/libclrjit.dylib # https://clrjit2.blob.core.windows.net/jitrollingbuild/builds/755f01659f03196487ec41225de8956911f8049b/OSX/x64/Checked/libclrjit.dylib # # ... # # # ... etc. ... # # # # We just want to extract the entries. We could probably use an XML parsing package, but we just # use regular expressions. urls_split = contents.split("")[1:] for item in urls_split: url = item.split("")[0].strip() if filter_func(url): urls.append(url) # Look for a continuation marker. re_match = re.match(r'.*(.*).*', contents) if re_match: marker_text = re_match.group(1) marker = "&marker=" + marker_text iter += 1 else: break return urls def upload_command(coreclr_args): """ Upload the JIT Args: coreclr_args (CoreclrArguments): parsed args """ print("JIT upload") def upload_blob(file, blob_name): blob_client = blob_service_client.get_blob_client(container=az_container_name, blob=blob_name) # Check if the blob already exists, and delete it if it does, before uploading / replacing it. try: blob_client.get_blob_properties() # If no exception, then the blob already exists. Delete it! print("Warning: replacing existing blob!") blob_client.delete_blob() except Exception: # Blob doesn't exist already; that's good pass with open(file, "rb") as data: blob_client.upload_blob(data) # 1. Find all the JIT builds in the product directory # 2. Upload them # # We could also upload debug info, but it's not clear it's needed for most purposes, and it is very big: # it increases the upload size from about 190MB to over 900MB for each roll. # # For reference, the JIT debug info is found: # a. For Windows, in the PDB subdirectory, e.g. PDB\clrjit.pdb # b. For Linux .dbg files, and Mac .dwarf files, in the same directory as the jit, e.g., libcoreclr.so.dbg # Target directory: /git-hash/OS/architecture/build-flavor/ # Note that build-flavor will probably always be Checked. files = [] # First, find the primary JIT that we expect to find. jit_name = determine_jit_name(coreclr_args) jit_path = os.path.join(coreclr_args.product_location, jit_name) if not os.path.isfile(jit_path): print("Error: Couldn't find JIT at {}".format(jit_path)) raise RuntimeError("Missing JIT") files.append(jit_path) # Next, look for any and all cross-compilation JITs. These are named, e.g.: # clrjit_unix_x64_x64.dll # clrjit_win_arm_x64.dll # clrjit_win_arm64_x64.dll # and so on, and live in the same product directory as the primary JIT. # # Note that the expression below explicitly filters out the primary JIT since we added that above. # We handle the primary JIT specially so we can error if it is missing. For the cross-compilation # JITs, we don't bother trying to ensure that all the ones we might expect are actually there. # # We don't do a recursive walk because the JIT is also copied to the "sharedFramework" subdirectory, # so we don't want to pick that up. if coreclr_args.host_os == "OSX": allowed_extensions = [ ".dylib" ] # Add .dwarf for debug info elif coreclr_args.host_os == "Linux": allowed_extensions = [ ".so" ] # Add .dbg for debug info elif coreclr_args.host_os == "windows": allowed_extensions = [ ".dll" ] else: raise RuntimeError("Unknown OS.") cross_jit_paths = [os.path.join(coreclr_args.product_location, item) for item in os.listdir(coreclr_args.product_location) if re.match(r'.*clrjit.*', item) and item != jit_name and any(item.endswith(extension) for extension in allowed_extensions)] files += cross_jit_paths # On Windows, grab the PDB files from a sub-directory. # if coreclr_args.host_os == "windows": # pdb_dir = os.path.join(coreclr_args.product_location, "PDB") # if os.path.isdir(pdb_dir): # pdb_paths = [os.path.join(pdb_dir, item) for item in os.listdir(pdb_dir) if re.match(r'.*clrjit.*', item)] # files += pdb_paths print("Uploading:") for item in files: print(" {}".format(item)) try: from azure.storage.blob import BlobServiceClient except: print("Please install:") print(" pip install azure-storage-blob") print("See also https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python") raise RuntimeError("Missing azure storage package.") blob_service_client = BlobServiceClient(account_url=az_blob_storage_account_uri, credential=coreclr_args.az_storage_key) blob_folder_name = "{}/{}/{}/{}/{}".format(az_builds_root_folder, coreclr_args.git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type) total_bytes_uploaded = 0 # Should we compress the JIT on upload? It would save space, but it makes it slightly more complicated to use # because you can't just "wget" or otherwise download the file and use it immediately -- you need to unzip first. # So for now, don't compress it. compress_jit = False with TempDir() as temp_location: for file in files: if compress_jit: # Zip compress the file we will upload zip_name = os.path.basename(file) + ".zip" zip_path = os.path.join(temp_location, zip_name) print("Compress {} -> {}".format(file, zip_path)) with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file: zip_file.write(file, os.path.basename(file)) file_stat_result = os.stat(file) zip_stat_result = os.stat(zip_path) print("Compressed {:n} to {:n} bytes".format(file_stat_result.st_size, zip_stat_result.st_size)) total_bytes_uploaded += zip_stat_result.st_size blob_name = "{}/{}".format(blob_folder_name, zip_name) print("Uploading: {} ({}) -> {}".format(file, zip_path, az_blob_storage_container_uri + "/" + blob_name)) upload_blob(zip_path, blob_name) else: file_stat_result = os.stat(file) total_bytes_uploaded += file_stat_result.st_size file_name = os.path.basename(file) blob_name = "{}/{}".format(blob_folder_name, file_name) print("Uploading: {} -> {}".format(file, az_blob_storage_container_uri + "/" + blob_name)) upload_blob(file, blob_name) print("Uploaded {:n} bytes".format(total_bytes_uploaded)) print("Finished JIT upload") def download_urls(urls, target_dir): """ Download a set of files, specified as URLs, to a target directory. If the URLs are to .ZIP files, then uncompress them and copy all contents to the target directory. Args: urls (list): the URLs to download target_dir (str): target directory where files are copied. Directory must exist Returns: list of local filenames of downloaded files """ print("Downloading:") for url in urls: print(" {}".format(url)) local_files = [] # In case we'll need a temp directory for ZIP file processing, create it first. with TempDir() as temp_location: for url in urls: item_name = url.split("/")[-1] if url.lower().endswith(".zip"): # Delete everything in the temp_location (from previous iterations of this loop, so previous URL downloads). temp_location_items = [os.path.join(temp_location, item) for item in os.listdir(temp_location)] for item in temp_location_items: if os.path.isdir(item): shutil.rmtree(item) else: os.remove(item) download_path = os.path.join(temp_location, item_name) print("Download: {} -> {}".format(url, download_path)) urllib.request.urlretrieve(url, download_path) print("Uncompress {}".format(download_path)) with zipfile.ZipFile(download_path, "r") as file_handle: file_handle.extractall(temp_location) # Copy everything that was extracted to the target directory. items = [ os.path.join(temp_location, item) for item in os.listdir(temp_location) if not item.endswith(".zip") ] for item in items: target_path = os.path.join(target_dir, os.path.basename(item)) print("Copy {} -> {}".format(item, target_path)) shutil.copy2(item, target_dir) local_files.append(target_path) else: # Not a zip file; download directory to target directory download_path = os.path.join(target_dir, item_name) print("Download: {} -> {}".format(url, download_path)) urllib.request.urlretrieve(url, download_path) local_files.append(download_path) return local_files def get_jit_urls(coreclr_args, find_all=False): """ Helper method: collect a list of URLs for all the JIT files to download or list. Args: coreclr_args (CoreclrArguments): parsed args find_all (bool): True to show all, or False to filter based on coreclr_args """ blob_filter_string = "{}/{}/{}/{}".format(coreclr_args.git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type) blob_prefix_filter = "{}/{}/{}".format(az_blob_storage_container_uri, az_builds_root_folder, blob_filter_string).lower() # Determine if a URL in Azure Storage should be allowed. The URL looks like: # https://clrjit.blob.core.windows.net/jitrollingbuild/builds/git_hash/Linux/x64/Checked/clrjit.dll # Filter to just the current git_hash, OS, architecture, and build_flavor. # If "find_all" is True, then no filtering happens: everything is returned. def filter_jits(url): url = url.lower() return find_all or url.startswith(blob_prefix_filter) return list_az_jits(filter_jits, None if find_all else blob_filter_string) def download_command(coreclr_args): """ Download the JIT Args: coreclr_args (CoreclrArguments): parsed args """ urls = get_jit_urls(coreclr_args, find_all=False) if urls is None: return download_urls(urls, coreclr_args.target_dir) def list_command(coreclr_args): """ List the JITs in Azure Storage Args: coreclr_args (CoreclrArguments) : parsed args """ urls = get_jit_urls(coreclr_args, find_all=coreclr_args.all) if urls is None: return count = len(urls) if coreclr_args.all: print("{} JIT files".format(count)) else: blob_filter_string = "{}/{}/{}/{}".format(coreclr_args.git_hash, coreclr_args.host_os, coreclr_args.arch, coreclr_args.build_type) print("{} JIT files for {}".format(count, blob_filter_string)) print("") for url in urls: print("{}".format(url)) print("") def setup_args(args): """ Setup the args for SuperPMI to use. Args: args (ArgParse): args parsed by arg parser Returns: args (CoreclrArguments) """ coreclr_args = CoreclrArguments(args, require_built_core_root=False, require_built_product_dir=False, require_built_test_dir=False, default_build_type="Checked") coreclr_args.verify(args, "mode", # "mode" is from the `parser.add_subparsers(dest='mode')` call lambda unused: True, "Unable to set mode") if coreclr_args.mode == "upload": coreclr_args.verify(args, "git_hash", lambda unused: True, "Unable to set git_hash") coreclr_args.verify(args, "az_storage_key", lambda item: item is not None, "Specify az_storage_key or set environment variable CLRJIT_AZ_KEY to the key to use.", modify_arg=lambda arg: os.environ["CLRJIT_AZ_KEY"] if arg is None and "CLRJIT_AZ_KEY" in os.environ else arg) coreclr_args.verify(args, "skip_cleanup", lambda unused: True, "Unable to set skip_cleanup") if not os.path.isdir(coreclr_args.product_location): print("Built product location could not be determined") raise RuntimeError("Error") elif coreclr_args.mode == "download": coreclr_args.verify(args, "git_hash", lambda unused: True, "Unable to set git_hash") coreclr_args.verify(args, "target_dir", lambda unused: True, "Unable to set target_dir") coreclr_args.verify(args, "skip_cleanup", lambda unused: True, "Unable to set skip_cleanup") if not os.path.isdir(coreclr_args.target_dir): print("--target_dir directory does not exist") raise RuntimeError("Error") elif coreclr_args.mode == "list": coreclr_args.verify(args, "git_hash", lambda unused: True, "Unable to set git_hash") coreclr_args.verify(args, "all", lambda unused: True, "Unable to set all") return coreclr_args ################################################################################ # main ################################################################################ def main(args): """ Main method """ if sys.version_info.major < 3: print("Please install python 3 or greater") return 1 coreclr_args = setup_args(args) success = True if coreclr_args.mode == "upload": upload_command(coreclr_args) elif coreclr_args.mode == "download": download_command(coreclr_args) elif coreclr_args.mode == "list": list_command(coreclr_args) else: raise NotImplementedError(coreclr_args.mode) return 0 if success else 1 ################################################################################ # __main__ ################################################################################ if __name__ == "__main__": args = parser.parse_args() sys.exit(main(args))