diff options
author | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2022-02-25 21:24:42 +0300 |
---|---|---|
committer | Roman Grundkiewicz <rgrundkiewicz@gmail.com> | 2022-02-25 21:24:42 +0300 |
commit | c0497dd22a60b64d141a28b92279b52a814f8b85 (patch) | |
tree | 4df5b89615a972c3372c642116f7deb9e61fc67a | |
parent | d1b224ef2c06f8a433354bfd0ea523bb84a30995 (diff) |
Unify run_mrt.sh scripts
-rw-r--r-- | README.md | 3 | ||||
-rwxr-xr-x | run_mrt.sh | 166 |
2 files changed, 118 insertions, 51 deletions
@@ -7,7 +7,8 @@ pure C++ with minimal dependencies. This repository contains the regression test framework for the main development repository: https://github.com/marian-nmt/marian-dev. -Tests have been developed for Linux for Marian compiled using GCC 7+. +Tests have been developed for Linux for Marian compiled using GCC 8+ and Nvidia +Maxwell/Pascal GPUs. ## Structure @@ -20,20 +20,47 @@ SHELL=/bin/bash export LC_ALL=C.UTF-8 +export MRT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +export MRT_TOOLS="$MRT_ROOT/tools" +export MRT_MARIAN="$( realpath "${MARIAN:-$MRT_ROOT/../build}" )" + +RUN_LOGS="$MRT_ROOT/previous.log.tmp" # Logging file for log and logn commands +rm -f $RUN_LOGS + +# Needed so that previous.log is not overwritten when it is provided as an argument +function cleanup { + test -s "$RUN_LOGS" && mv "$RUN_LOGS" "$MRT_ROOT/previous.log" +} +trap cleanup EXIT function log { - echo [$(date "+%m/%d/%Y %T")] $@ + echo "[$(date '+%m/%d/%Y %T')] $@" | tee -a $RUN_LOGS } function logn { - echo -n [$(date "+%m/%d/%Y %T")] $@ + echo -n "[$(date '+%m/%d/%Y %T')] $@" | tee -a $RUN_LOGS +} + +function loge { + echo $@ | tee -a $RUN_LOGS } log "Running on $(hostname) as process $$" -export MRT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -export MRT_TOOLS=$MRT_ROOT/tools -export MRT_MARIAN="$( realpath ${MARIAN:-$MRT_ROOT/../build} )" +# On Windows, the .exe suffix should be added to executables +UNAME=$(uname) +if [ "$UNAME" == "Linux" ]; then + log "Running on Linux machine" + export MRT_BIN= + export MRT_OS=linux +elif [[ "$UNAME" == CYGWIN* || "$UNAME" == MINGW* ]]; then + log "Running on Windows machine" + export MRT_BIN=.exe + export MRT_OS=windows +else + log "Unsupported or unrecognized machine with uname= $UNAME" + exit 1 +fi # Print folders which contain models and data for regression tests export MRT_MODELS="$( realpath ${MODELS:-$MRT_ROOT/models} )" @@ -43,41 +70,51 @@ log "Using models from: $MRT_MODELS" log "Using data from: $MRT_DATA" # Try adding build/ to MARIAN for backward compatibility -if [[ ! -e $MRT_MARIAN/marian-decoder ]]; then +if [[ ! -e "$MRT_MARIAN/marian-decoder$MRT_BIN" ]]; then MRT_MARIAN="$MRT_MARIAN/build" fi # Check if required tools are present in marian directory for cmd in marian marian-decoder marian-scorer marian-vocab; do - if [ ! -e $MRT_MARIAN/$cmd ]; then - echo "Error: '$MRT_MARIAN/$cmd' not found. Do you need to compile the toolkit first?" + if [ ! -e "$MRT_MARIAN/$cmd$MRT_BIN" ]; then + loge "Error: '$MRT_MARIAN/$cmd$MRT_BIN' not found. Do you need to compile the toolkit first?" exit 1 fi done -log "Using Marian binary: $MRT_MARIAN/marian" +# Common Marian executables +export MRT_MARIAN_TRAINER="$MRT_MARIAN/marian$MRT_BIN" +export MRT_MARIAN_DECODER="$MRT_MARIAN/marian-decoder$MRT_BIN" +export MRT_MARIAN_SCORER="$MRT_MARIAN/marian-scorer$MRT_BIN" +export MRT_MARIAN_VOCAB="$MRT_MARIAN/marian-vocab$MRT_BIN" + +log "Using Marian binary: $MRT_MARIAN_DECODER" # Log Marian version -export MRT_MARIAN_VERSION=$($MRT_MARIAN/marian --version 2>&1) +export MRT_MARIAN_VERSION=$($MRT_MARIAN_TRAINER --version 2>&1) log "Version: $MRT_MARIAN_VERSION" # Get CMake settings from the --build-info option -if ! grep -q "build-info" < <( $MRT_MARIAN/marian --help ); then - echo "Error: Marian is too old as it does not have the required --build-info option" +if ! grep -q "build-info" < <( $MRT_MARIAN_TRAINER --help ); then + loge "Error: Marian does not have the required --build-info option. Use newer version of Marian" exit 1 fi -$MRT_MARIAN/marian --build-info all 2> $MRT_ROOT/cmake.log +$MRT_MARIAN_TRAINER --build-info all 2> "$MRT_ROOT/cmake.log" + +if test ! -s "$MRT_ROOT/cmake.log" || grep -q "Error: build-info is not available" "$MRT_ROOT/cmake.log"; then + loge "Warning: Marian does not set the required --build-info option. Tests may not work properly" +fi # Check Marian compilation settings -export MRT_MARIAN_BUILD_TYPE=$(cat $MRT_ROOT/cmake.log | grep "CMAKE_BUILD_TYPE=" | cut -f2 -d=) -export MRT_MARIAN_COMPILER=$(cat $MRT_ROOT/cmake.log | grep "CMAKE_CXX_COMPILER=" | cut -f2 -d=) -export MRT_MARIAN_USE_MKL=$(cat $MRT_ROOT/cmake.log | egrep "COMPILE_CPU=(ON|on|1)") -export MRT_MARIAN_USE_CUDA=$(cat $MRT_ROOT/cmake.log | egrep "COMPILE_CUDA=(ON|on|1)") -export MRT_MARIAN_USE_CUDNN=$(cat $MRT_ROOT/cmake.log | egrep "USE_CUDNN=(ON|on|1)") -export MRT_MARIAN_USE_SENTENCEPIECE=$(cat $MRT_ROOT/cmake.log | egrep "USE_SENTENCEPIECE=(ON|on|1)") -export MRT_MARIAN_USE_FBGEMM=$(cat $MRT_ROOT/cmake.log | egrep "USE_FBGEMM=(ON|on|1)") -export MRT_MARIAN_USE_UNITTESTS=$(cat $MRT_ROOT/cmake.log | egrep "COMPILE_TESTS=(ON|on|1)") +export MRT_MARIAN_BUILD_TYPE=$(cat $MRT_ROOT/cmake.log | grep -i "CMAKE_BUILD_TYPE=" | cut -f2 -d=) +export MRT_MARIAN_COMPILER=$(cat $MRT_ROOT/cmake.log | grep -i "CMAKE_CXX_COMPILER=" | cut -f2 -d=) +export MRT_MARIAN_USE_MKL=$(cat $MRT_ROOT/cmake.log | egrep -i "COMPILE_CPU=(true|on|1)" | cat) +export MRT_MARIAN_USE_CUDA=$(cat $MRT_ROOT/cmake.log | egrep -i "COMPILE_CUDA=(true|on|1)" | cat) +export MRT_MARIAN_USE_CUDNN=$(cat $MRT_ROOT/cmake.log | egrep -i "USE_CUDNN=(true|on|1)" | cat) +export MRT_MARIAN_USE_SENTENCEPIECE=$(cat $MRT_ROOT/cmake.log | egrep -i "USE_SENTENCEPIECE=(true|on|1)" | cat) +export MRT_MARIAN_USE_FBGEMM=$(cat $MRT_ROOT/cmake.log | egrep -i "USE_FBGEMM=(true|on|1)" | cat) +export MRT_MARIAN_USE_UNITTESTS=$(cat $MRT_ROOT/cmake.log | egrep -i "COMPILE_TESTS=(true|on|1)" | cat) log "Build type: $MRT_MARIAN_BUILD_TYPE" log "Using compiler: $MRT_MARIAN_COMPILER" @@ -87,13 +124,28 @@ log "Using SentencePiece: $MRT_MARIAN_USE_SENTENCEPIECE" log "Using FBGEMM: $MRT_MARIAN_USE_FBGEMM" log "Unit tests: $MRT_MARIAN_USE_UNITTESTS" + # Number of available devices -cuda_num_devices=$(($(echo $CUDA_VISIBLE_DEVICES | grep -c ',')+1)) +cuda_num_devices=$(($(echo $CUDA_VISIBLE_DEVICES | grep -c ',' | cat)+1)) export MRT_NUM_DEVICES=${NUM_DEVICES:-$cuda_num_devices} log "Using CUDA visible devices: $CUDA_VISIBLE_DEVICES" log "Using number of GPU devices: $MRT_NUM_DEVICES" + +# CPU architecture details +test -e "$MRT_ROOT/cpuinfo.log" || cat /proc/cpuinfo > "$MRT_ROOT/cpuinfo.log" +grep -qi "avx2" "$MRT_ROOT/cpuinfo.log" && MRT_CPU_AVX2=true +grep -qi "avx512" "$MRT_ROOT/cpuinfo.log" && MRT_CPU_AVX512=true +grep -qi "avx512_vnni" "$MRT_ROOT/cpuinfo.log" && MRT_CPU_AVX512VNNI=true +export MRT_CPU_AVX2 +export MRT_CPU_AVX512 +export MRT_CPU_AVX512VNNI + +log "CPU intrinsics: avx2=$MRT_CPU_AVX2 avx512=$MRT_CPU_AVX512 avx512vnni=$MRT_CPU_AVX512VNNI" + + +# Time out export MRT_TIMEOUT=${TIMEOUT:-5m} # the default time out is 5 minutes, see `man timeout` cmd_timeout="" if [ $MRT_TIMEOUT != "0" ]; then @@ -105,14 +157,19 @@ log "Using time out: $MRT_TIMEOUT" # Exit codes export EXIT_CODE_SUCCESS=0 export EXIT_CODE_SKIP=100 +export EXIT_CODE_SKIP_MISSING_FILE=101 +export EXIT_CODE_SKIP_NO_FBGEMM=105 +export EXIT_CODE_SKIP_NO_SENTENCEPIECE=106 +export EXIT_CODE_SKIP_NO_AVX2=110 +export EXIT_CODE_SKIP_NO_AVX512=111 export EXIT_CODE_TIMEOUT=124 # Exit code returned by the timeout command if timed out function format_time { - dt=$(echo "$2 - $1" | bc 2>/dev/null) - dh=$(echo "$dt/3600" | bc 2>/dev/null) - dt2=$(echo "$dt-3600*$dh" | bc 2>/dev/null) - dm=$(echo "$dt2/60" | bc 2>/dev/null) - ds=$(echo "$dt2-60*$dm" | bc 2>/dev/null) + dt=$(python -c "print($2 - $1)" 2>/dev/null) + dh=$(python -c "print(int($dt/3600))" 2>/dev/null) + dt2=$(python -c "print($dt-3600*$dh)" 2>/dev/null) + dm=$(python -c "print(int($dt2/60))" 2>/dev/null) + ds=$(python -c "print($dt2-60*$dm)" 2>/dev/null) LANG=C printf "%02d:%02d:%02.3fs" $dh $dm $ds } @@ -126,7 +183,7 @@ if [ $# -ge 1 ]; then # A log file with paths to test files if [[ "$arg" = *.log ]]; then # Extract tests from .log file - args=$(cat $arg | grep '/test_.*\.sh' | grep -v '/_' | sed 's/^ *- *//' | tr '\n' ' ' | sed 's/ *$//') + args=$(cat $arg | grep -vP '^\[' | grep '/test_.*\.sh' | grep -v '/_' | sed 's/^ *- *//' | tr '\n' ' ' | sed 's/ *$//') test_prefixes="$test_prefixes $args" # A hash tag elif [[ "$arg" = '#'* ]]; then @@ -141,8 +198,14 @@ if [ $# -ge 1 ]; then done fi +# Check if the variable is empty or contains only spaces +if [[ -z "${test_prefixes// }" ]]; then + log "Error: no tests found in the specified input(s): $@" + exit 1 +fi + # Extract all subdirectories, which will be traversed to look for regression tests -test_dirs=$(find $test_prefixes -type d | grep -v "/_") +test_dirs=$(find $test_prefixes -type d | grep -v "/_" | cat) if grep -q "/test_.*\.sh" <<< "$test_prefixes"; then test_files=$(printf '%s\n' $test_prefixes | sed 's!*/!!') @@ -208,7 +271,7 @@ do if [ "$nosetup" = true ]; then ((++count_skipped)) tests_skipped+=($test_path) - echo " skipped" + loge " skipped" cd $MRT_ROOT continue; fi @@ -221,11 +284,11 @@ do # Check exit code if [ $exit_code -eq $EXIT_CODE_SUCCESS ]; then ((++count_passed)) - echo " OK" + loge " OK" elif [ $exit_code -eq $EXIT_CODE_SKIP ]; then ((++count_skipped)) tests_skipped+=($test_path) - echo " skipped" + loge " skipped" elif [ $exit_code -eq $EXIT_CODE_TIMEOUT ]; then ((++count_timedout)) tests_timedout+=($test_path) @@ -233,12 +296,12 @@ do echo "The test timed out after $TIMEOUT" >> $test_file.log # A timed out test is a failed test ((++count_failed)) - echo " timed out" + loge " timed out" success=false else ((++count_failed)) tests_failed+=($test_path) - echo " failed" + loge " failed" success=false fi @@ -270,39 +333,42 @@ done time_end=$(date +%s.%N) time_total=$(format_time $time_start $time_end) -prev_log=previous.log -rm -f $prev_log - ############################################################################### # Print skipped and failed tests if [ -n "$tests_skipped" ] || [ -n "$tests_failed" ] || [ -n "$tests_timedout" ]; then - echo "---------------------" + loge "---------------------" fi -[[ -z "$tests_skipped" ]] || echo "Skipped:" | tee -a $prev_log +[[ -z "$tests_skipped" ]] || loge "Skipped:" for test_name in "${tests_skipped[@]}"; do - echo " - $test_name" | tee -a $prev_log + loge "- $test_name" done -[[ -z "$tests_failed" ]] || echo "Failed:" | tee -a $prev_log +[[ -z "$tests_failed" ]] || loge "Failed:" for test_name in "${tests_failed[@]}"; do - echo " - $test_name" | tee -a $prev_log + loge "- $test_name" done -[[ -z "$tests_timedout" ]] || echo "Timed out:" | tee -a $prev_log +[[ -z "$tests_timedout" ]] || loge "Timed out:" for test_name in "${tests_timedout[@]}"; do - echo " - $test_name" | tee -a $prev_log + loge "- $test_name" done [[ -z "$tests_failed" ]] || echo "Logs:" for test_name in "${tests_failed[@]}"; do - echo " - $(realpath $test_name | sed 's/\.sh/.sh.log/')" + echo "- $(realpath $test_name | sed 's/\.sh/.sh.log/')" done ############################################################################### # Print summary -echo "---------------------" | tee -a $prev_log -echo -n "Ran $count_all tests in $time_total, $count_passed passed, $count_skipped skipped, $count_failed failed" | tee -a $prev_log -[ -n "$tests_timedout" ] && (echo -n " (incl. $count_timedout timed out)" | tee -a $prev_log) -echo "" | tee -a $prev_log +loge "---------------------" +loge -n "Ran $count_all tests in $time_total, $count_passed passed, $count_skipped skipped, $count_failed failed" +[ -n "$tests_timedout" ] && loge -n " (incl. $count_timedout timed out)" +loge "" # Return exit code -$success && [ $count_all -gt 0 ] +if $success && [ $count_all -gt 0 ]; then + loge "OK" + exit 0 +else + loge "FAILED" + exit 1 +fi |