Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoman Grundkiewicz <rogrundk@microsoft.com>2021-08-06 11:02:18 +0300
committerRoman Grundkiewicz <rogrundk@microsoft.com>2021-08-06 11:02:18 +0300
commite025bfb07c471c7505d0cc38e718f0f708783539 (patch)
tree56b6cc25c906522177795300fcc1d4b7745dc3c8
parentd124ca9f5b14458fdd110a96aee06e42178f4b3e (diff)
Merged PR 20070: Run regression tests in Azure Pipelines
The changes proposed in this pull request: * Added regression testing with internal models into Azure Pipelines on both Windows and Ubuntu * Created https://machinetranslation.visualstudio.com/Marian/_git/marian-prod-tests (more tests will be added over time) * Made regression test outputs (all `.log`, `.out`, `.diff` files) available for inspection as a downloadable artifact. * Made `--build-info` option available in CMake-based Windows builds Warning: I tried to handle multiple cases, but some regression tests may occasionally fail, especially tests using avx2 or avx512 models, because the outputs are system/CPU dependent. I think it's better to merge this already, monitoring the stability of tests, and adding expected outputs variations if necessary, improving the coverage and stability of regression tests over time.
-rw-r--r--CMakeLists.txt3
-rw-r--r--azure-pipelines.yml288
-rw-r--r--src/common/config_parser.cpp8
3 files changed, 276 insertions, 23 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 870fb70b..4e6f24c7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -624,6 +624,9 @@ execute_process(COMMAND rm ${CMAKE_CURRENT_SOURCE_DIR}/src/common/build_info.cpp
OUTPUT_QUIET ERROR_QUIET)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/common/build_info.cpp.in
${CMAKE_CURRENT_BINARY_DIR}/src/common/build_info.cpp @ONLY)
+# to be able to check if this is a CMake-based compilation, which always adds
+# build-info option, even on Windows.
+add_definitions(-DBUILD_INFO_AVAILABLE=1)
# Compile source files
include_directories(${marian_SOURCE_DIR}/src)
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index a1198818..4f7ce02d 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -15,25 +15,37 @@ pool:
name: Azure Pipelines
variables:
- BOOST_ROOT_WINDOWS: "C:/hostedtoolcache/windows/Boost/1.72.0/x86_64"
- CUDA_PATH_WINDOWS: "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA"
- MKL_DIR: "$(Build.SourcesDirectory)/mkl"
- MKL_URL: "https://romang.blob.core.windows.net/mariandev/ci/mkl-2020.1-windows-static.zip"
- VCPKG_COMMIT: c69096659f49e2b1aca532ea5c2f8c135182519b
- VCPKG_DIR: "$(Build.SourcesDirectory)/vcpkg"
- VCPKG_PACKAGES: "protobuf"
+ - group: marian-prod-tests
+ - name: BOOST_ROOT_WINDOWS
+ value: "C:/hostedtoolcache/windows/Boost/1.72.0/x86_64"
+ - name: BOOST_URL
+ value: "https://sourceforge.net/projects/boost/files/boost-binaries/1.72.0/boost_1_72_0-msvc-14.2-64.exe"
+ - name: CUDA_PATH_WINDOWS
+ value: "C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA"
+ - name: MKL_DIR
+ value: "$(Build.SourcesDirectory)/mkl"
+ - name: MKL_URL
+ value: "https://romang.blob.core.windows.net/mariandev/ci/mkl-2020.1-windows-static.zip"
+ - name: VCPKG_COMMIT
+ value: c69096659f49e2b1aca532ea5c2f8c135182519b
+ - name: VCPKG_DIR
+ value: "$(Build.SourcesDirectory)/vcpkg"
+ - name: VCPKG_PACKAGES
+ value: "protobuf"
# The Visual Studio installation directory can be found using:
# pushd "C:\Program Files (x86)\Microsoft Visual Studio\Installer\"
# for /f "delims=" %%x in ('.\vswhere.exe -latest -property InstallationPath') do set VSPATH=%%x
# popd
- VS_PATH: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise"
+ - name: VS_PATH
+ value: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise"
stages:
-- stage: Build
+- stage: Builds
jobs:
######################################################################
- - job: Windows
+ - job: BuildWindows
+ displayName: Windows
strategy:
matrix:
@@ -62,21 +74,31 @@ stages:
Expand-Archive -Force mkl.zip $(MKL_DIR)
displayName: Download MKL
- ## Cache for vcpkg packages. It does not work yet properly due to linker errors after restoring it.
+ ## Cache for Boost
#- task: Cache@2
- # displayName: Cache
+ # displayName: Cache Boost
# inputs:
# # Change the first value (v0) to another value to clear the cache
- # key: 'v0 | "$(VCPKG_PACKAGES)" | vcpkg | "$(Agent.OS)"'
- # path: $(VCPKG_DIR)
+ # key: '"v0" | "boost" | "$(BOOST_URL)" | "$(BOOST_ROOT_WINDOWS)" | "$(Agent.OS)"'
+ # path: $(BOOST_ROOT_WINDOWS)
+ # cacheHitVar: CACHE_BOOST_RESTORED
# Boost is no longer pre-installed on Azure/GitHub-hosted Windows runners
- pwsh: |
Write-Host "Downloading Boost to $(BOOST_ROOT_WINDOWS)"
- $Url = "https://sourceforge.net/projects/boost/files/boost-binaries/1.72.0/boost_1_72_0-msvc-14.2-64.exe"
- C:\msys64\usr\bin\wget.exe -nv $Url -O "$(Pipeline.Workspace)/boost.exe"
+ C:\msys64\usr\bin\wget.exe -nv "$(BOOST_URL)" -O "$(Pipeline.Workspace)/boost.exe"
Start-Process -Wait -FilePath "$(Pipeline.Workspace)/boost.exe" "/SILENT","/SP-","/SUPPRESSMSGBOXES","/DIR=$(BOOST_ROOT_WINDOWS)"
displayName: Download Boost
+ condition: ne(variables.CACHE_BOOST_RESTORED, 'true')
+
+ ## Cache for vcpkg packages. It does not work yet properly due to linker errors after restoring it.
+ #- task: Cache@2
+ # displayName: Cache vcpkg
+ # inputs:
+ # # Change the first value (v0) to another value to clear the cache
+ # key: '"v0" | "vcpkg" | "$(VCPKG_COMMIT)" | "$(VCPKG_PACKAGES)" | "$(Agent.OS)"'
+ # path: $(VCPKG_DIR)
+ # cacheHitVar: CACHE_VCPKG_RESTORED
- pwsh: |
git clone https://github.com/Microsoft/vcpkg.git $(VCPKG_DIR)
@@ -91,6 +113,7 @@ stages:
Remove-Item $(VCPKG_DIR)\downloads -Force -Recurse -ErrorAction SilentlyContinue
Remove-Item $(VCPKG_DIR)\buildtrees -Force -Recurse -ErrorAction SilentlyContinue
displayName: Prepare vcpkg
+ condition: ne(variables.CACHE_VCPKG_RESTORED, 'true')
- script: |
:: Load VS environment
@@ -402,12 +425,12 @@ stages:
- checkout: self
submodules: true
- # The following packages are already installed on Azure-hosted runners: build-essential openssl libssl-dev
- # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because it is installed together with libprotobuf-dev
+ # The following packages are already installed on Azure-hosted runners: build-essential openssl libssl-dev
+ # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because it is installed together with libprotobuf-dev
- bash: sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler
displayName: Install packages
- # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
+ # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
- bash: |
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
@@ -444,3 +467,230 @@ stages:
ls -lah *
displayName: Check targets
workingDirectory: install
+
+
+# Marian is built in the same job where the regression tests are run to make sure that executables
+# is compiled and run on a machine with the same CPU architecture, which is required for
+# compilations with FBGEMM.
+- stage: Tests
+ jobs:
+
+ ######################################################################
+ - job: TestWindows
+ displayName: Windows CPU+FBGEMM
+
+ pool:
+ vmImage: windows-latest
+
+ steps:
+ # Due to multiple checkouts this will be commonly cloned into D:\a\1\s\marian-dev
+ - checkout: self
+ submodules: true
+
+ - pwsh: |
+ C:\msys64\usr\bin\wget.exe -nv $(MKL_URL) -O mkl.zip
+ Expand-Archive -Force mkl.zip $(MKL_DIR)
+ displayName: Download MKL
+
+ # Cache for vcpkg packages
+ - task: Cache@2
+ displayName: Cache vcpkg
+ inputs:
+ # Change the first value (v0) to another value to clear the cache
+ key: '"v0" | "vcpkg" | "$(VCPKG_COMMIT)" | "$(VCPKG_PACKAGES)" | "$(Agent.OS)"'
+ path: $(VCPKG_DIR)
+ cacheHitVar: CACHE_VCPKG_RESTORED
+
+ - pwsh: |
+ git clone https://github.com/Microsoft/vcpkg.git $(VCPKG_DIR)
+ cd $(VCPKG_DIR)
+ git checkout $(VCPKG_COMMIT)
+ pushd
+ .\bootstrap-vcpkg.bat -disableMetrics
+ popd
+ # Install packages
+ .\vcpkg.exe install --triplet x64-windows-static $(VCPKG_PACKAGES)
+ # Clean to make the cache smaller
+ Remove-Item $(VCPKG_DIR)\downloads -Force -Recurse -ErrorAction SilentlyContinue
+ Remove-Item $(VCPKG_DIR)\buildtrees -Force -Recurse -ErrorAction SilentlyContinue
+ displayName: Prepare vcpkg
+ condition: ne(variables.CACHE_VCPKG_RESTORED, 'true')
+
+ - script: |
+ :: Load VS environment
+ call "$(VS_PATH)/VC/Auxiliary/Build/vcvarsall.bat" x64
+ :: Create build directory
+ mkdir build
+ cd build
+ :: Run CMake
+ cmake .. -G Ninja ^
+ -DCMAKE_BUILD_TYPE="Slim" ^
+ -DCMAKE_C_COMPILER="cl.exe" ^
+ -DCMAKE_CXX_COMPILER="cl.exe" ^
+ -DCMAKE_MAKE_PROGRAM="ninja.exe" ^
+ -DCMAKE_TOOLCHAIN_FILE="$(VCPKG_DIR)\scripts\buildsystems\vcpkg.cmake" ^
+ -DVCPKG_TARGET_TRIPLET="x64-windows-static" ^
+ ^
+ -DCOMPILE_CPU="TRUE" ^
+ -DCOMPILE_CUDA="FALSE" ^
+ ^
+ -DUSE_FBGEMM="TRUE" ^
+ -DUSE_SENTENCEPIECE="TRUE" ^
+ -DUSE_STATIC_LIBS="TRUE"
+ displayName: Configure CMake
+ env:
+ # Set envvars so that CMake can find the installed packages
+ MKLROOT: $(MKL_DIR)
+ workingDirectory: marian-dev
+
+ - script: |
+ pwd
+ call "$(VS_PATH)/VC/Auxiliary/Build/vcvarsall.bat" x64
+ ninja
+ displayName: Compile
+ workingDirectory: marian-dev/build
+
+ ## Publish an artifact with Marian executables. Disabled because it is not portable due to
+ ## fbgemm and not needed at the moment
+ #- pwsh: Compress-Archive -Path marian*.exe spm_*.exe -DestinationPath $(Build.SourcesDirectory)/marian-dev-ci_windows-x64_cpu.zip
+ #displayName: Prepare binaries
+ #workingDirectory: marian-dev/build
+ #- publish: marian-dev-ci_windows-x64_cpu.zip
+ #artifact: marian-dev-ci_windows-x64_cpu
+ #displayName: Publish binaries
+
+ # Running regression tests
+ # Due to multiple checkouts this will be commonly cloned into D:\a\1\s\marian-prod-tests
+ - checkout: git://Marian/marian-prod-tests
+
+ # Collect details about the CPU architecture, etc.
+ # Because the outputs goes to *.log files, they will be also included in the artifact with test outputs.
+ - script: bash -c "cat /proc/cpuinfo | tee cpuinfo.log"
+ displayName: Machine statistics
+ workingDirectory: marian-prod-tests
+
+ - bash: |
+ cd models
+ bash download-models.sh
+ ls
+ displayName: Prepare tests
+ env:
+ AWS_SECRET_SAS_TOKEN: $(blob-sas-token)
+ workingDirectory: marian-prod-tests
+
+ # Avoid using $(Build.SourcesDirectory) in bash tasks because on Windows pools it uses '\'
+ # instead of '/', which often breaks the job
+ - bash: MARIAN=../marian-dev/build bash ./run_mrt.sh '#cpu' '#basics'
+ continueOnError: true
+ displayName: Run tests
+ workingDirectory: marian-prod-tests
+
+ - bash: |
+ # cut -c3- removes './' from paths making 7z to retain the directory structure
+ find . -type f \( -name "*.log" -o -name "*.out" -o -name "*.diff" \) -print | cut -c3- > listing.txt
+ echo "Creating an artifact with the following files:"
+ cat listing.txt
+ 7z a -tzip ../regression-tests-ci_windows-x64_cpu.zip @listing.txt
+ displayName: Collect outputs
+ workingDirectory: marian-prod-tests
+
+ - publish: regression-tests-ci_windows-x64_cpu.zip
+ artifact: regression-tests-ci_windows-x64_cpu
+ displayName: Publish outputs
+
+ ######################################################################
+ - job: TestLinux
+ displayName: Linux CPU+FBGEMM
+
+ pool:
+ vmImage: ubuntu-latest
+
+ steps:
+ # Due to multiple checkouts this will be commonly cloned into D:\a\1\s\marian-dev
+ - checkout: self
+ submodules: true
+
+ # The following packages are already installed on Azure-hosted runners: build-essential openssl libssl-dev
+ # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because it is installed together with libprotobuf-dev
+ - bash: sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler gcc-8 g++-8
+ displayName: Install packages
+
+ # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
+ - bash: |
+ wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
+ sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
+ sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
+ sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
+ displayName: Install MKL
+
+ # Note that COMPILE_CPU=on and USE_SENTENCEPUECE=on are set explicitly to make them detectable
+ # by the regression tests framework (not sure if it is still required)
+ - bash: |
+ mkdir -p install
+ mkdir -p build
+ cd build
+ CC=/usr/bin/gcc-8 CXX=/usr/bin/g++-8 \
+ cmake .. \
+ -DCMAKE_BUILD_TYPE=slim \
+ -DCOMPILE_CPU=on \
+ -DCOMPILE_CUDA=off \
+ -DUSE_FBGEMM=on \
+ -DUSE_SENTENCEPIECE=on \
+ -DUSE_STATIC_LIBS=on
+ displayName: Configure CMake
+ workingDirectory: marian-dev
+
+ - bash: make -j3
+ displayName: Compile
+ workingDirectory: marian-dev/build
+
+ ## Publish an artifact with Marian executables. Disabled because it is not portable due to
+ ## fbgemm and not needed at the moment
+ #- bash: zip $(Build.SourcesDirectory)/marian-dev-ci_linux-x64-static_cpu.zip marian* spm_*
+ #displayName: Prepare binaries
+ #workingDirectory: marian-dev/build
+ #- publish: marian-dev-ci_linux-x64-static_cpu.zip
+ #artifact: marian-dev-ci_linux-x64-static_cpu
+ #displayName: Publish binaries
+
+ # Running regression tests
+ # Due to multiple checkouts this will be commonly cloned into D:\a\1\s\marian-prod-tests
+ - checkout: git://Marian/marian-prod-tests
+
+ # Collect details about the CPU architecture, etc.
+ # Because the outputs goes to *.log files, they will be also included in the artifact with test outputs.
+ - bash: |
+ echo ">>> lscpu"
+ lscpu | tee lscpu.log
+ echo ">>> cpuinfo"
+ cat /proc/cpuinfo | tee cpuinfo.log
+ /usr/bin/gcc-8 --version | tee gcc.log
+ displayName: Machine statistics
+ workingDirectory: marian-prod-tests
+
+ - bash: |
+ cd models
+ bash download-models.sh
+ ls
+ displayName: Prepare tests
+ env:
+ AWS_SECRET_SAS_TOKEN: $(blob-sas-token)
+ workingDirectory: marian-prod-tests
+
+ - bash: MARIAN=../marian-dev/build bash ./run_mrt.sh '#cpu' '#basics'
+ continueOnError: true
+ displayName: Run tests
+ workingDirectory: marian-prod-tests
+
+ - bash: |
+ # cut -c3- removes './' from paths making 7z to retain the directory structure
+ find . -type f \( -name "*.log" -o -name "*.out" -o -name "*.diff" \) -print | cut -c3- > listing.txt
+ echo "Creating an artifact with the following files:"
+ cat listing.txt
+ 7z a -tzip ../regression-tests-ci_linux-x64-static_cpu.zip @listing.txt
+ displayName: Collect outputs
+ workingDirectory: marian-prod-tests
+
+ - publish: regression-tests-ci_linux-x64-static_cpu.zip
+ artifact: regression-tests-ci_linux-x64-static_cpu
+ displayName: Publish outputs
diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp
index 870bf52d..d7818afb 100644
--- a/src/common/config_parser.cpp
+++ b/src/common/config_parser.cpp
@@ -983,15 +983,15 @@ Ptr<Options> ConfigParser::parseOptions(int argc, char** argv, bool doValidate)
auto buildInfo = get<std::string>("build-info");
if(!buildInfo.empty() && buildInfo != "false") {
-#ifndef _MSC_VER // cmake build options are not available on MSVC based build.
+#ifdef BUILD_INFO_AVAILABLE // cmake build options are not available on MSVC based build.
if(buildInfo == "all")
std::cerr << cmakeBuildOptionsAdvanced() << std::endl;
else
std::cerr << cmakeBuildOptions() << std::endl;
exit(0);
-#else // _MSC_VER
- ABORT("build-info is not available on MSVC based build.");
-#endif // _MSC_VER
+#else // BUILD_INFO_AVAILABLE
+ ABORT("build-info is not available on MSVC based build unless compiled via CMake.");
+#endif // BUILD_INFO_AVAILABLE
}
// get paths to extra config files