diff options
author | Roman Grundkiewicz <rogrundk@microsoft.com> | 2021-08-06 11:02:18 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rogrundk@microsoft.com> | 2021-08-06 11:02:18 +0300 |
commit | e025bfb07c471c7505d0cc38e718f0f708783539 (patch) | |
tree | 56b6cc25c906522177795300fcc1d4b7745dc3c8 | |
parent | d124ca9f5b14458fdd110a96aee06e42178f4b3e (diff) |
Merged PR 20070: Run regression tests in Azure Pipelines
The changes proposed in this pull request:
* Added regression testing with internal models into Azure Pipelines on both Windows and Ubuntu
* Created https://machinetranslation.visualstudio.com/Marian/_git/marian-prod-tests (more tests will be added over time)
* Made regression test outputs (all `.log`, `.out`, `.diff` files) available for inspection as a downloadable artifact.
* Made `--build-info` option available in CMake-based Windows builds
Warning: I tried to handle multiple cases, but some regression tests may occasionally fail, especially tests using avx2 or avx512 models, because the outputs are system/CPU dependent. I think it's better to merge this already, monitoring the stability of tests, and adding expected outputs variations if necessary, improving the coverage and stability of regression tests over time.
-rw-r--r-- | CMakeLists.txt | 3 | ||||
-rw-r--r-- | azure-pipelines.yml | 288 | ||||
-rw-r--r-- | src/common/config_parser.cpp | 8 |
3 files changed, 276 insertions, 23 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 870fb70b..4e6f24c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -624,6 +624,9 @@ execute_process(COMMAND rm ${CMAKE_CURRENT_SOURCE_DIR}/src/common/build_info.cpp OUTPUT_QUIET ERROR_QUIET) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/common/build_info.cpp.in ${CMAKE_CURRENT_BINARY_DIR}/src/common/build_info.cpp @ONLY) +# to be able to check if this is a CMake-based compilation, which always adds +# build-info option, even on Windows. +add_definitions(-DBUILD_INFO_AVAILABLE=1) # Compile source files include_directories(${marian_SOURCE_DIR}/src) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index a1198818..4f7ce02d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -15,25 +15,37 @@ pool: name: Azure Pipelines variables: - BOOST_ROOT_WINDOWS: "C:/hostedtoolcache/windows/Boost/1.72.0/x86_64" - CUDA_PATH_WINDOWS: "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA" - MKL_DIR: "$(Build.SourcesDirectory)/mkl" - MKL_URL: "https://romang.blob.core.windows.net/mariandev/ci/mkl-2020.1-windows-static.zip" - VCPKG_COMMIT: c69096659f49e2b1aca532ea5c2f8c135182519b - VCPKG_DIR: "$(Build.SourcesDirectory)/vcpkg" - VCPKG_PACKAGES: "protobuf" + - group: marian-prod-tests + - name: BOOST_ROOT_WINDOWS + value: "C:/hostedtoolcache/windows/Boost/1.72.0/x86_64" + - name: BOOST_URL + value: "https://sourceforge.net/projects/boost/files/boost-binaries/1.72.0/boost_1_72_0-msvc-14.2-64.exe" + - name: CUDA_PATH_WINDOWS + value: "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA" + - name: MKL_DIR + value: "$(Build.SourcesDirectory)/mkl" + - name: MKL_URL + value: "https://romang.blob.core.windows.net/mariandev/ci/mkl-2020.1-windows-static.zip" + - name: VCPKG_COMMIT + value: c69096659f49e2b1aca532ea5c2f8c135182519b + - name: VCPKG_DIR + value: "$(Build.SourcesDirectory)/vcpkg" + - name: VCPKG_PACKAGES + value: "protobuf" # The Visual Studio installation directory can be found using: # pushd "C:\Program Files (x86)\Microsoft Visual Studio\Installer\" # for /f "delims=" %%x in ('.\vswhere.exe -latest -property InstallationPath') do set VSPATH=%%x # popd - VS_PATH: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise" + - name: VS_PATH + value: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise" stages: -- stage: Build +- stage: Builds jobs: ###################################################################### - - job: Windows + - job: BuildWindows + displayName: Windows strategy: matrix: @@ -62,21 +74,31 @@ stages: Expand-Archive -Force mkl.zip $(MKL_DIR) displayName: Download MKL - ## Cache for vcpkg packages. It does not work yet properly due to linker errors after restoring it. + ## Cache for Boost #- task: Cache@2 - # displayName: Cache + # displayName: Cache Boost # inputs: # # Change the first value (v0) to another value to clear the cache - # key: 'v0 | "$(VCPKG_PACKAGES)" | vcpkg | "$(Agent.OS)"' - # path: $(VCPKG_DIR) + # key: '"v0" | "boost" | "$(BOOST_URL)" | "$(BOOST_ROOT_WINDOWS)" | "$(Agent.OS)"' + # path: $(BOOST_ROOT_WINDOWS) + # cacheHitVar: CACHE_BOOST_RESTORED # Boost is no longer pre-installed on Azure/GitHub-hosted Windows runners - pwsh: | Write-Host "Downloading Boost to $(BOOST_ROOT_WINDOWS)" - $Url = "https://sourceforge.net/projects/boost/files/boost-binaries/1.72.0/boost_1_72_0-msvc-14.2-64.exe" - C:\msys64\usr\bin\wget.exe -nv $Url -O "$(Pipeline.Workspace)/boost.exe" + C:\msys64\usr\bin\wget.exe -nv "$(BOOST_URL)" -O "$(Pipeline.Workspace)/boost.exe" Start-Process -Wait -FilePath "$(Pipeline.Workspace)/boost.exe" "/SILENT","/SP-","/SUPPRESSMSGBOXES","/DIR=$(BOOST_ROOT_WINDOWS)" displayName: Download Boost + condition: ne(variables.CACHE_BOOST_RESTORED, 'true') + + ## Cache for vcpkg packages. It does not work yet properly due to linker errors after restoring it. + #- task: Cache@2 + # displayName: Cache vcpkg + # inputs: + # # Change the first value (v0) to another value to clear the cache + # key: '"v0" | "vcpkg" | "$(VCPKG_COMMIT)" | "$(VCPKG_PACKAGES)" | "$(Agent.OS)"' + # path: $(VCPKG_DIR) + # cacheHitVar: CACHE_VCPKG_RESTORED - pwsh: | git clone https://github.com/Microsoft/vcpkg.git $(VCPKG_DIR) @@ -91,6 +113,7 @@ stages: Remove-Item $(VCPKG_DIR)\downloads -Force -Recurse -ErrorAction SilentlyContinue Remove-Item $(VCPKG_DIR)\buildtrees -Force -Recurse -ErrorAction SilentlyContinue displayName: Prepare vcpkg + condition: ne(variables.CACHE_VCPKG_RESTORED, 'true') - script: | :: Load VS environment @@ -402,12 +425,12 @@ stages: - checkout: self submodules: true - # The following packages are already installed on Azure-hosted runners: build-essential openssl libssl-dev - # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because it is installed together with libprotobuf-dev + # The following packages are already installed on Azure-hosted runners: build-essential openssl libssl-dev + # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because it is installed together with libprotobuf-dev - bash: sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler displayName: Install packages - # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html + # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html - bash: | wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add - sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list" @@ -444,3 +467,230 @@ stages: ls -lah * displayName: Check targets workingDirectory: install + + +# Marian is built in the same job where the regression tests are run to make sure that executables +# is compiled and run on a machine with the same CPU architecture, which is required for +# compilations with FBGEMM. +- stage: Tests + jobs: + + ###################################################################### + - job: TestWindows + displayName: Windows CPU+FBGEMM + + pool: + vmImage: windows-latest + + steps: + # Due to multiple checkouts this will be commonly cloned into D:\a\1\s\marian-dev + - checkout: self + submodules: true + + - pwsh: | + C:\msys64\usr\bin\wget.exe -nv $(MKL_URL) -O mkl.zip + Expand-Archive -Force mkl.zip $(MKL_DIR) + displayName: Download MKL + + # Cache for vcpkg packages + - task: Cache@2 + displayName: Cache vcpkg + inputs: + # Change the first value (v0) to another value to clear the cache + key: '"v0" | "vcpkg" | "$(VCPKG_COMMIT)" | "$(VCPKG_PACKAGES)" | "$(Agent.OS)"' + path: $(VCPKG_DIR) + cacheHitVar: CACHE_VCPKG_RESTORED + + - pwsh: | + git clone https://github.com/Microsoft/vcpkg.git $(VCPKG_DIR) + cd $(VCPKG_DIR) + git checkout $(VCPKG_COMMIT) + pushd + .\bootstrap-vcpkg.bat -disableMetrics + popd + # Install packages + .\vcpkg.exe install --triplet x64-windows-static $(VCPKG_PACKAGES) + # Clean to make the cache smaller + Remove-Item $(VCPKG_DIR)\downloads -Force -Recurse -ErrorAction SilentlyContinue + Remove-Item $(VCPKG_DIR)\buildtrees -Force -Recurse -ErrorAction SilentlyContinue + displayName: Prepare vcpkg + condition: ne(variables.CACHE_VCPKG_RESTORED, 'true') + + - script: | + :: Load VS environment + call "$(VS_PATH)/VC/Auxiliary/Build/vcvarsall.bat" x64 + :: Create build directory + mkdir build + cd build + :: Run CMake + cmake .. -G Ninja ^ + -DCMAKE_BUILD_TYPE="Slim" ^ + -DCMAKE_C_COMPILER="cl.exe" ^ + -DCMAKE_CXX_COMPILER="cl.exe" ^ + -DCMAKE_MAKE_PROGRAM="ninja.exe" ^ + -DCMAKE_TOOLCHAIN_FILE="$(VCPKG_DIR)\scripts\buildsystems\vcpkg.cmake" ^ + -DVCPKG_TARGET_TRIPLET="x64-windows-static" ^ + ^ + -DCOMPILE_CPU="TRUE" ^ + -DCOMPILE_CUDA="FALSE" ^ + ^ + -DUSE_FBGEMM="TRUE" ^ + -DUSE_SENTENCEPIECE="TRUE" ^ + -DUSE_STATIC_LIBS="TRUE" + displayName: Configure CMake + env: + # Set envvars so that CMake can find the installed packages + MKLROOT: $(MKL_DIR) + workingDirectory: marian-dev + + - script: | + pwd + call "$(VS_PATH)/VC/Auxiliary/Build/vcvarsall.bat" x64 + ninja + displayName: Compile + workingDirectory: marian-dev/build + + ## Publish an artifact with Marian executables. Disabled because it is not portable due to + ## fbgemm and not needed at the moment + #- pwsh: Compress-Archive -Path marian*.exe spm_*.exe -DestinationPath $(Build.SourcesDirectory)/marian-dev-ci_windows-x64_cpu.zip + #displayName: Prepare binaries + #workingDirectory: marian-dev/build + #- publish: marian-dev-ci_windows-x64_cpu.zip + #artifact: marian-dev-ci_windows-x64_cpu + #displayName: Publish binaries + + # Running regression tests + # Due to multiple checkouts this will be commonly cloned into D:\a\1\s\marian-prod-tests + - checkout: git://Marian/marian-prod-tests + + # Collect details about the CPU architecture, etc. + # Because the outputs goes to *.log files, they will be also included in the artifact with test outputs. + - script: bash -c "cat /proc/cpuinfo | tee cpuinfo.log" + displayName: Machine statistics + workingDirectory: marian-prod-tests + + - bash: | + cd models + bash download-models.sh + ls + displayName: Prepare tests + env: + AWS_SECRET_SAS_TOKEN: $(blob-sas-token) + workingDirectory: marian-prod-tests + + # Avoid using $(Build.SourcesDirectory) in bash tasks because on Windows pools it uses '\' + # instead of '/', which often breaks the job + - bash: MARIAN=../marian-dev/build bash ./run_mrt.sh '#cpu' '#basics' + continueOnError: true + displayName: Run tests + workingDirectory: marian-prod-tests + + - bash: | + # cut -c3- removes './' from paths making 7z to retain the directory structure + find . -type f \( -name "*.log" -o -name "*.out" -o -name "*.diff" \) -print | cut -c3- > listing.txt + echo "Creating an artifact with the following files:" + cat listing.txt + 7z a -tzip ../regression-tests-ci_windows-x64_cpu.zip @listing.txt + displayName: Collect outputs + workingDirectory: marian-prod-tests + + - publish: regression-tests-ci_windows-x64_cpu.zip + artifact: regression-tests-ci_windows-x64_cpu + displayName: Publish outputs + + ###################################################################### + - job: TestLinux + displayName: Linux CPU+FBGEMM + + pool: + vmImage: ubuntu-latest + + steps: + # Due to multiple checkouts this will be commonly cloned into D:\a\1\s\marian-dev + - checkout: self + submodules: true + + # The following packages are already installed on Azure-hosted runners: build-essential openssl libssl-dev + # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because it is installed together with libprotobuf-dev + - bash: sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler gcc-8 g++-8 + displayName: Install packages + + # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html + - bash: | + wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add - + sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list" + sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list" + sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088 + displayName: Install MKL + + # Note that COMPILE_CPU=on and USE_SENTENCEPUECE=on are set explicitly to make them detectable + # by the regression tests framework (not sure if it is still required) + - bash: | + mkdir -p install + mkdir -p build + cd build + CC=/usr/bin/gcc-8 CXX=/usr/bin/g++-8 \ + cmake .. \ + -DCMAKE_BUILD_TYPE=slim \ + -DCOMPILE_CPU=on \ + -DCOMPILE_CUDA=off \ + -DUSE_FBGEMM=on \ + -DUSE_SENTENCEPIECE=on \ + -DUSE_STATIC_LIBS=on + displayName: Configure CMake + workingDirectory: marian-dev + + - bash: make -j3 + displayName: Compile + workingDirectory: marian-dev/build + + ## Publish an artifact with Marian executables. Disabled because it is not portable due to + ## fbgemm and not needed at the moment + #- bash: zip $(Build.SourcesDirectory)/marian-dev-ci_linux-x64-static_cpu.zip marian* spm_* + #displayName: Prepare binaries + #workingDirectory: marian-dev/build + #- publish: marian-dev-ci_linux-x64-static_cpu.zip + #artifact: marian-dev-ci_linux-x64-static_cpu + #displayName: Publish binaries + + # Running regression tests + # Due to multiple checkouts this will be commonly cloned into D:\a\1\s\marian-prod-tests + - checkout: git://Marian/marian-prod-tests + + # Collect details about the CPU architecture, etc. + # Because the outputs goes to *.log files, they will be also included in the artifact with test outputs. + - bash: | + echo ">>> lscpu" + lscpu | tee lscpu.log + echo ">>> cpuinfo" + cat /proc/cpuinfo | tee cpuinfo.log + /usr/bin/gcc-8 --version | tee gcc.log + displayName: Machine statistics + workingDirectory: marian-prod-tests + + - bash: | + cd models + bash download-models.sh + ls + displayName: Prepare tests + env: + AWS_SECRET_SAS_TOKEN: $(blob-sas-token) + workingDirectory: marian-prod-tests + + - bash: MARIAN=../marian-dev/build bash ./run_mrt.sh '#cpu' '#basics' + continueOnError: true + displayName: Run tests + workingDirectory: marian-prod-tests + + - bash: | + # cut -c3- removes './' from paths making 7z to retain the directory structure + find . -type f \( -name "*.log" -o -name "*.out" -o -name "*.diff" \) -print | cut -c3- > listing.txt + echo "Creating an artifact with the following files:" + cat listing.txt + 7z a -tzip ../regression-tests-ci_linux-x64-static_cpu.zip @listing.txt + displayName: Collect outputs + workingDirectory: marian-prod-tests + + - publish: regression-tests-ci_linux-x64-static_cpu.zip + artifact: regression-tests-ci_linux-x64-static_cpu + displayName: Publish outputs diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 870bf52d..d7818afb 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -983,15 +983,15 @@ Ptr<Options> ConfigParser::parseOptions(int argc, char** argv, bool doValidate) auto buildInfo = get<std::string>("build-info"); if(!buildInfo.empty() && buildInfo != "false") { -#ifndef _MSC_VER // cmake build options are not available on MSVC based build. +#ifdef BUILD_INFO_AVAILABLE // cmake build options are not available on MSVC based build. if(buildInfo == "all") std::cerr << cmakeBuildOptionsAdvanced() << std::endl; else std::cerr << cmakeBuildOptions() << std::endl; exit(0); -#else // _MSC_VER - ABORT("build-info is not available on MSVC based build."); -#endif // _MSC_VER +#else // BUILD_INFO_AVAILABLE + ABORT("build-info is not available on MSVC based build unless compiled via CMake."); +#endif // BUILD_INFO_AVAILABLE } // get paths to extra config files |