From 23a14e354c21af641a91055deee97d8c86ca7961 Mon Sep 17 00:00:00 2001 From: edwardgao Date: Fri, 30 Dec 2011 02:30:19 +0000 Subject: Add support of Cmake --- mgizapp/CMakeLists.txt | 67 ++++++++++++++++++ mgizapp/cmake/CheckCXXSourceCompiles.cmake | 60 +++++++++++++++++ mgizapp/cmake/FindTR1.cmake | 84 +++++++++++++++++++++++ mgizapp/cmake/SRLBoost.cmake | 21 ++++++ mgizapp/scripts/CMakeLists.txt | 4 ++ mgizapp/scripts/force-align-moses-old.sh | 48 +++++++++++++ mgizapp/scripts/force-align-moses.sh | 23 ++++--- mgizapp/scripts/run.sh | 23 +++++++ mgizapp/src/CMakeLists.txt | 105 +++++++++++++++++++++++++++++ mgizapp/src/mkcls/CMakeLists.txt | 72 ++++++++++++++++++++ 10 files changed, 496 insertions(+), 11 deletions(-) create mode 100644 mgizapp/CMakeLists.txt create mode 100644 mgizapp/cmake/CheckCXXSourceCompiles.cmake create mode 100644 mgizapp/cmake/FindTR1.cmake create mode 100644 mgizapp/cmake/SRLBoost.cmake create mode 100644 mgizapp/scripts/CMakeLists.txt create mode 100755 mgizapp/scripts/force-align-moses-old.sh create mode 100755 mgizapp/scripts/run.sh create mode 100644 mgizapp/src/CMakeLists.txt create mode 100644 mgizapp/src/mkcls/CMakeLists.txt (limited to 'mgizapp') diff --git a/mgizapp/CMakeLists.txt b/mgizapp/CMakeLists.txt new file mode 100644 index 0000000..6f9955f --- /dev/null +++ b/mgizapp/CMakeLists.txt @@ -0,0 +1,67 @@ +PROJECT (srlextract) + +SET(MGIZA_VERSION_MAJOR "0") +SET(MGIZA_VERSION_MINOR "6") +SET(MGIZA_VERSION_PATCH "7") + +MATH(EXPR MGIZA_INT_VERSION "(${MGIZA_VERSION_MAJOR} * 10000) + (${MGIZA_VERSION_MINOR} * 100) + (${MGIZA_VERSION_PATCH} * 1)" ) + +SET(MGIZA_VERSION "${MGIZA_VERSION_MAJOR}.${MGIZA_VERSION_MINOR}.${MGIZA_VERSION_PATCH}") + +#CMake 2.6+ is recommended to an improved Boost module +CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR) + +IF(COMMAND cmake_policy) + cmake_policy(SET CMP0003 NEW) +ENDIF(COMMAND cmake_policy) + +IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + SET(CMAKE_INSTALL_PREFIX + "inst" CACHE PATH "MGIZA install prefix" FORCE + ) + MESSAGE(STATUS "You have not set the install dir, default to './inst', if + you want to set it, use cmake -DCMAKE_INSTALL_PREFIX to do so") +ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + +#set various platform specific global options +IF(WIN32) + SET(CMAKE_DEBUG_POSTFIX "d") +ENDIF(WIN32) + +INCLUDE(cmake/FindTR1.cmake) + +# include specific modules +SET(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") + + +# Find Boost library, specify +# BOOST_ROOT=/e/programs/boost_1_35_0 +# BOOST_LIBRARYDIR=$BOOST_ROOT/stage/lib + +set(Boost_USE_STATIC_LIBS ON) +set(Boost_USE_MULTITHREADED ON) +set(Boost_USE_STATIC_RUNTIME OFF) + + +FIND_PACKAGE( Boost 1.46) + + IF(Boost_FOUND) + IF (NOT _boost_IN_CACHE) + MESSAGE( "Boost found" ) + message(STATUS "Boost_INCLUDE_DIR : ${Boost_INCLUDE_DIR}") + ENDIF (NOT _boost_IN_CACHE) + LINK_DIRECTORIES ( ${Boost_LIBRARY_DIRS} ) + INCLUDE_DIRECTORIES ( ${Boost_INCLUDE_DIRS} ) +ELSE() + MESSAGE(FATAL_ERROR "Boost not found, please set the BOOST_ROOT and BOOST_LIBRARYDIR environment variables " ) +ENDIF() + +ADD_SUBDIRECTORY (src) +ADD_SUBDIRECTORY (src/mkcls) +ADD_SUBDIRECTORY (scripts) + + + + + + diff --git a/mgizapp/cmake/CheckCXXSourceCompiles.cmake b/mgizapp/cmake/CheckCXXSourceCompiles.cmake new file mode 100644 index 0000000..3921c89 --- /dev/null +++ b/mgizapp/cmake/CheckCXXSourceCompiles.cmake @@ -0,0 +1,60 @@ +# - Check if the source code provided in the SOURCE argument compiles. +# CHECK_CXX_SOURCE_COMPILES(SOURCE VAR) +# - macro which checks if the source code compiles +# SOURCE - source code to try to compile +# VAR - variable to store whether the source code compiled +# +# The following variables may be set before calling this macro to +# modify the way the check is run: +# +# CMAKE_REQUIRED_FLAGS = string of compile command line flags +# CMAKE_REQUIRED_DEFINITIONS = list of macros to define (-DFOO=bar) +# CMAKE_REQUIRED_INCLUDES = list of include directories +# CMAKE_REQUIRED_LIBRARIES = list of libraries to link + +MACRO(CHECK_CXX_SOURCE_COMPILES SOURCE VAR) + IF("${VAR}" MATCHES "^${VAR}$") + SET(MACRO_CHECK_FUNCTION_DEFINITIONS + "-D${VAR} ${CMAKE_REQUIRED_FLAGS}") + IF(CMAKE_REQUIRED_LIBRARIES) + SET(CHECK_CXX_SOURCE_COMPILES_ADD_LIBRARIES + "-DLINK_LIBRARIES:STRING=${CMAKE_REQUIRED_LIBRARIES}") + ELSE(CMAKE_REQUIRED_LIBRARIES) + SET(CHECK_CXX_SOURCE_COMPILES_ADD_LIBRARIES) + ENDIF(CMAKE_REQUIRED_LIBRARIES) + IF(CMAKE_REQUIRED_INCLUDES) + SET(CHECK_CXX_SOURCE_COMPILES_ADD_INCLUDES + "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}") + ELSE(CMAKE_REQUIRED_INCLUDES) + SET(CHECK_CXX_SOURCE_COMPILES_ADD_INCLUDES) + ENDIF(CMAKE_REQUIRED_INCLUDES) + FILE(WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx" + "${SOURCE}\n") + + MESSAGE(STATUS "Performing Test ${VAR}") + TRY_COMPILE(${VAR} + ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx + COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS} + CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_FUNCTION_DEFINITIONS} + "${CHECK_CXX_SOURCE_COMPILES_ADD_LIBRARIES}" + "${CHECK_CXX_SOURCE_COMPILES_ADD_INCLUDES}" + OUTPUT_VARIABLE OUTPUT) + IF(${VAR}) + SET(${VAR} 1 CACHE INTERNAL "Test ${VAR}") + MESSAGE(STATUS "Performing Test ${VAR} - Success") + FILE(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log + "Performing C++ SOURCE FILE Test ${VAR} succeded with the following output:\n" + "${OUTPUT}\n" + "Source file was:\n${SOURCE}\n") + ELSE(${VAR}) + MESSAGE(STATUS "Performing Test ${VAR} - Failed") + SET(${VAR} "" CACHE INTERNAL "Test ${VAR}") + FILE(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log + "Performing C++ SOURCE FILE Test ${VAR} failed with the following output:\n" + "${OUTPUT}\n" + "Source file was:\n${SOURCE}\n") + ENDIF(${VAR}) + ENDIF("${VAR}" MATCHES "^${VAR}$") +ENDMACRO(CHECK_CXX_SOURCE_COMPILES) + diff --git a/mgizapp/cmake/FindTR1.cmake b/mgizapp/cmake/FindTR1.cmake new file mode 100644 index 0000000..3ef2b44 --- /dev/null +++ b/mgizapp/cmake/FindTR1.cmake @@ -0,0 +1,84 @@ +# Check availability of C++ TR1 contents. + +# Sets the following variables: +# +# TR1_SHARED_PTR_FOUND -- std::tr1::shared_ptr1 available +# TR1_SHARED_PTR_USE_TR1_MEMORY -- #include +# TR1_SHARED_PTR_USE_MEMORY -- #include + +# We need to have at least this version to support the VERSION_LESS argument to 'if' (2.6.2) and unset (2.6.3) +cmake_policy(PUSH) + cmake_minimum_required(VERSION 2.6.3) +cmake_policy(POP) + +INCLUDE(${PROJECT_SOURCE_DIR}/cmake/CheckCXXSourceCompiles.cmake) +# --------------------------------------------------------------------------- +# std::tr1::shared_ptr +# --------------------------------------------------------------------------- + +check_cxx_source_compiles( + " + #include + int main() { + std::tr1::shared_ptr ptr; + return 0; + } + " +TR1_SHARED_PTR_USE_TR1_MEMORY) +check_cxx_source_compiles( + " + #include + int main() { + std::tr1::shared_ptr ptr; + return 0; + } + " +TR1_SHARED_PTR_USE_MEMORY) + +set (TR1_SHARED_PTR -NOTFOUND) +if (TR1_SHARED_PTR_USE_TR1_MEMORY) +set (TR1_SHARED_PTR_FOUND TRUE) +endif (TR1_SHARED_PTR_USE_TR1_MEMORY) +if (TR1_SHARED_PTR_USE_MEMORY) +set (TR1_SHARED_PTR_FOUND TRUE) +endif (TR1_SHARED_PTR_USE_MEMORY) + +mark_as_advanced (TR1_SHARED_PTR_FOUND) +mark_as_advanced (TR1_SHARED_PTR_USE_TR1_MEMORY) +mark_as_advanced (TR1_SHARED_PTR_USE_MEMORY) + +# --------------------------------------------------------------------------- +# std::tr1::unordered_map +# --------------------------------------------------------------------------- + +check_cxx_source_compiles( + " + #include + int main() { + std::tr1::unordered_map m; + return 0; + } + " + TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP) +check_cxx_source_compiles( + " + #include + int main() { + std::tr1::unordered_map m; + return 0; + } + " + TR1_UNORDERED_MAP_USE_UNORDERED_MAP) + +set (TR1_UNORDERED_MAP -NOTFOUND) +if (TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP) +set (TR1_UNORDERED_MAP_FOUND TRUE) +endif (TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP) +if (TR1_UNORDERED_MAP_USE_UNORDERED_MAP) +set (TR1_UNORDERED_MAP_FOUND TRUE) +endif (TR1_UNORDERED_MAP_USE_UNORDERED_MAP) + +mark_as_advanced (TR1_UNORDERED_MAP_FOUND) +mark_as_advanced (TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP) +mark_as_advanced (TR1_UNORDERED_MAP_USE_UNORDERED_MAP) + diff --git a/mgizapp/cmake/SRLBoost.cmake b/mgizapp/cmake/SRLBoost.cmake new file mode 100644 index 0000000..af3a956 --- /dev/null +++ b/mgizapp/cmake/SRLBoost.cmake @@ -0,0 +1,21 @@ +#Locate Boost libs. Windows users: make sure BOOST_ROOT and BOOST_PATH are set correctly on your environment. +#See the site FAQ for more details. + +MACRO (GET_BOOST_INCLUDE_PATH path libs) + #todo: allow this to fall back on a local distributed copy, so user doesn't have to d/l Boost seperately + + #todo: limit Boost version? + #todo: use COMPONENTS threads to locate boost_threads without breaking the current support + IF(Boost_FOUND) + IF (NOT _boost_IN_CACHE) + MESSAGE( "Boost found" ) + message(STATUS "Boost_INCLUDE_DIR : ${Boost_INCLUDE_DIR}") + ENDIF (NOT _boost_IN_CACHE) + SET(${path} ${Boost_INCLUDE_DIRS} ) + SET(${libs} ${Boost_LIBRARIES} ) + link_directories ( ${Boost_LIBRARY_DIRS} ) + ELSE() + MESSAGE(FATAL_ERROR "Boost not found, please set the BOOST_ROOT environment variable " ) + ENDIF() +ENDMACRO (GET_BOOST_INCLUDE_PATH path libs) + diff --git a/mgizapp/scripts/CMakeLists.txt b/mgizapp/scripts/CMakeLists.txt new file mode 100644 index 0000000..afa694e --- /dev/null +++ b/mgizapp/scripts/CMakeLists.txt @@ -0,0 +1,4 @@ +INSTALL(PROGRAMS force-align-moses.sh giza2bal.pl merge_alignment.py plain2snt-hasvcb.py sntpostproc.py force-align-moses-old.sh run.sh + DESTINATION scripts + PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE + ) diff --git a/mgizapp/scripts/force-align-moses-old.sh b/mgizapp/scripts/force-align-moses-old.sh new file mode 100755 index 0000000..fd4cf12 --- /dev/null +++ b/mgizapp/scripts/force-align-moses-old.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +MGIZA=${QMT_HOME}/bin/mgiza + +if [ $# -lt 4 ]; then + echo "OK, this is simple, put me into your Moses training directory, link your source/target corpus" 1>&2 + echo "and run " $0 " PREFIX src_tag tgt_tag root-dir." 1>&2 + echo "and get force-aligned data: root-dir/giza.[src-tgt|tgt-src]/*.A3.final.* " 1>&2 + echo "make sure I can find PREFIX.src_tag-tgt_tag and PREFIX.tgt_tag-src_tag, and \${QMT_HOME} is set" 1>&2 + exit +fi + +PRE=$1 +SRC=$2 +TGT=$3 +ROOT=$4 + +mkdir -p $ROOT/giza.${SRC}-${TGT} +mkdir -p $ROOT/giza.${TGT}-${SRC} +mkdir -p $ROOT/corpus + +echo "Generating corpus file " 1>&2 + +${QMT_HOME}/scripts/plain2snt-hasvcb.py corpus/$SRC.vcb corpus/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/corpus/${TGT}-${SRC}.snt $ROOT/corpus/${SRC}-${TGT}.snt $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb + +ln -sf $PWD/corpus/$SRC.vcb.classes $PWD/corpus/$TGT.vcb.classes $ROOT/corpus/ + +echo "Generating co-occurrence file " 1>&2 + +${QMT_HOME}/bin/snt2cooc $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb $ROOT/corpus/${TGT}-${SRC}.snt +${QMT_HOME}/bin//snt2cooc $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc $ROOT/corpus/$TGT.vcb $ROOT/corpus/$SRC.vcb $ROOT/corpus/${SRC}-${TGT}.snt + +echo "Running force alignment " 1>&2 + +$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/corpus/$TGT-$SRC.snt -o $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC} \ +-s $ROOT/corpus/$SRC.vcb -t $ROOT/corpus/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc \ +-restart 11 -previoust giza.$TGT-$SRC/$TGT-$SRC.t3.final \ +-previousa giza.$TGT-$SRC/$TGT-$SRC.a3.final -previousd giza.$TGT-$SRC/$TGT-$SRC.d3.final \ +-previousn giza.$TGT-$SRC/$TGT-$SRC.n3.final -previousd4 giza.$TGT-$SRC/$TGT-$SRC.d4.final \ +-previousd42 giza.$TGT-$SRC/$TGT-$SRC.D4.final -m3 0 -m4 1 + +$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/corpus/$SRC-$TGT.snt -o $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT} \ +-s $ROOT/corpus/$TGT.vcb -t $ROOT/corpus/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc \ +-restart 11 -previoust giza.$SRC-$TGT/$SRC-$TGT.t3.final \ +-previousa giza.$SRC-$TGT/$SRC-$TGT.a3.final -previousd giza.$SRC-$TGT/$SRC-$TGT.d3.final \ +-previousn giza.$SRC-$TGT/$SRC-$TGT.n3.final -previousd4 giza.$SRC-$TGT/$SRC-$TGT.d4.final \ +-previousd42 giza.$SRC-$TGT/$SRC-$TGT.D4.final -m3 0 -m4 1 + diff --git a/mgizapp/scripts/force-align-moses.sh b/mgizapp/scripts/force-align-moses.sh index fd4cf12..ac95bcb 100755 --- a/mgizapp/scripts/force-align-moses.sh +++ b/mgizapp/scripts/force-align-moses.sh @@ -14,33 +14,34 @@ PRE=$1 SRC=$2 TGT=$3 ROOT=$4 +NUM=$5 -mkdir -p $ROOT/giza.${SRC}-${TGT} -mkdir -p $ROOT/giza.${TGT}-${SRC} -mkdir -p $ROOT/corpus +mkdir -p $ROOT/giza-inverse.${NUM} +mkdir -p $ROOT/giza.${NUM} +mkdir -p $ROOT/prepared.${NUM} echo "Generating corpus file " 1>&2 -${QMT_HOME}/scripts/plain2snt-hasvcb.py corpus/$SRC.vcb corpus/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/corpus/${TGT}-${SRC}.snt $ROOT/corpus/${SRC}-${TGT}.snt $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb +${QMT_HOME}/scripts/plain2snt-hasvcb.py prepared.${NUM}/$SRC.vcb prepared.${NUM}/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/prepared.${NUM}/${TGT}-${SRC}.snt $ROOT/prepared.${NUM}/${SRC}-${TGT}.snt $ROOT/prepared.${NUM}/$SRC.vcb $ROOT/prepared.${NUM}/$TGT.vcb -ln -sf $PWD/corpus/$SRC.vcb.classes $PWD/corpus/$TGT.vcb.classes $ROOT/corpus/ +ln -sf $PWD/prepared.${NUM}/$SRC.vcb.classes $PWD/prepared.${NUM}/$TGT.vcb.classes $ROOT/prepared.${NUM}/ echo "Generating co-occurrence file " 1>&2 -${QMT_HOME}/bin/snt2cooc $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb $ROOT/corpus/${TGT}-${SRC}.snt -${QMT_HOME}/bin//snt2cooc $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc $ROOT/corpus/$TGT.vcb $ROOT/corpus/$SRC.vcb $ROOT/corpus/${SRC}-${TGT}.snt +${QMT_HOME}/bin/snt2cooc $ROOT/giza.${NUM}/$TGT-${SRC}.cooc $ROOT/prepared.${NUM}/$SRC.vcb $ROOT/prepared.${NUM}/$TGT.vcb $ROOT/prepared.${NUM}/${TGT}-${SRC}.snt +${QMT_HOME}/bin//snt2cooc $ROOT/giza-inverse.${NUM}/$SRC-${TGT}.cooc $ROOT/prepared.${NUM}/$TGT.vcb $ROOT/prepared.${NUM}/$SRC.vcb $ROOT/prepared.${NUM}/${SRC}-${TGT}.snt echo "Running force alignment " 1>&2 -$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/corpus/$TGT-$SRC.snt -o $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC} \ --s $ROOT/corpus/$SRC.vcb -t $ROOT/corpus/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc \ +$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/prepared.${NUM}/$TGT-$SRC.snt -o $ROOT/giza.${NUM}/$TGT-${SRC} \ +-s $ROOT/prepared.${NUM}/$SRC.vcb -t $ROOT/prepared.${NUM}/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${NUM}/$TGT-${SRC}.cooc \ -restart 11 -previoust giza.$TGT-$SRC/$TGT-$SRC.t3.final \ -previousa giza.$TGT-$SRC/$TGT-$SRC.a3.final -previousd giza.$TGT-$SRC/$TGT-$SRC.d3.final \ -previousn giza.$TGT-$SRC/$TGT-$SRC.n3.final -previousd4 giza.$TGT-$SRC/$TGT-$SRC.d4.final \ -previousd42 giza.$TGT-$SRC/$TGT-$SRC.D4.final -m3 0 -m4 1 -$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/corpus/$SRC-$TGT.snt -o $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT} \ --s $ROOT/corpus/$TGT.vcb -t $ROOT/corpus/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc \ +$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/prepared.${NUM}/$SRC-$TGT.snt -o $ROOT/giza-inverse.${NUM}/$SRC-${TGT} \ +-s $ROOT/prepared.${NUM}/$TGT.vcb -t $ROOT/prepared.${NUM}/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza-inverse.${NUM}/$SRC-${TGT}.cooc \ -restart 11 -previoust giza.$SRC-$TGT/$SRC-$TGT.t3.final \ -previousa giza.$SRC-$TGT/$SRC-$TGT.a3.final -previousd giza.$SRC-$TGT/$SRC-$TGT.d3.final \ -previousn giza.$SRC-$TGT/$SRC-$TGT.n3.final -previousd4 giza.$SRC-$TGT/$SRC-$TGT.d4.final \ diff --git a/mgizapp/scripts/run.sh b/mgizapp/scripts/run.sh new file mode 100755 index 0000000..2bb3972 --- /dev/null +++ b/mgizapp/scripts/run.sh @@ -0,0 +1,23 @@ +PRE=test +SRC=fr +TGT=en +NUM=1 +SCRIPT_DIR=/opt/AO/sw/edinburgh-code/scripts-20110926-1425 + +export QMT_HOME=/root/workspace/mgizapp + +rm -rf out + +$QMT_HOME/scripts/force-align-moses.sh $PRE $SRC $TGT out 1 + +echo "FINISHED forced alignment" + +$SCRIPT_DIR/../merge_alignment.py out/giza-inverse.$NUM/$SRC-$TGT.A3.final.part* | gzip -c > out/giza-inverse.$NUM/$SRC-$TGT.A3.final.gz +$SCRIPT_DIR/../merge_alignment.py out/giza.$NUM/$TGT-$SRC.A3.final.part* | gzip -c > out/giza.$NUM/$TGT-$SRC.A3.final.gz + +$SCRIPT_DIR/training/symal/giza2bal.pl -d "gzip -cd out/giza.$NUM/$TGT-$SRC.A3.final.gz" -i "gzip -cd out/giza-inverse.$NUM/$SRC-$TGT.A3.final.gz" | $SCRIPT_DIR/training/symal/symal -alignment="grow" -diagonal="yes" -final="yes" -both="yes" > out/aligned.1.grow-diag-final-and + +echo "FINISHED giza2bal & symal" + +$SCRIPT_DIR/training/phrase-extract/extract $PRE.$TGT $PRE.$SRC out/aligned.1.grow-diag-final-and out/extract.1 7 orientation --model wbe-msd + diff --git a/mgizapp/src/CMakeLists.txt b/mgizapp/src/CMakeLists.txt new file mode 100644 index 0000000..b4b4f65 --- /dev/null +++ b/mgizapp/src/CMakeLists.txt @@ -0,0 +1,105 @@ + +# Set output directory + +FIND_PACKAGE(Threads) + + +SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) +SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) + +ADD_DEFINITIONS("-DNDEBUG") +ADD_DEFINITIONS("-DWORDINDEX_WITH_4_BYTE") +ADD_DEFINITIONS("-DBINARY_SEARCH_FOR_TTABLE") +ADD_DEFINITIONS("-DDEBUG") +ADD_DEFINITIONS("-Wno-deprecated") +ADD_DEFINITIONS("-Wno-write-strings") + +SET( LIBMGIZA_SRC + alignment.cpp alignment.h + AlignTables.cpp AlignTables.h + Array2.h Array4.h + Array.h ATables.cpp + ATables.h cmd.c + cmd.h collCounts.cpp + collCounts.h common.h + D4Tables.h + D5Tables.h defs.h + Dictionary.cpp Dictionary.h + file_spec.h FlexArray.h + ForwardBackward.cpp ForwardBackward.h + getSentence.cpp getSentence.h + Globals.h hmm.cpp + hmm.h + HMMTables.cpp HMMTables.h + logprob.cpp logprob.h + model1.cpp + model1.h model2.cpp + model2.h model2to3.cpp + model345-peg.cpp model3.cpp + model3.h model3_viterbi.cpp + model3_viterbi_with_tricks.cpp MoveSwapMatrix.cpp + MoveSwapMatrix.h myassert.cpp + myassert.h mymath.h + mystl.h NTables.cpp + NTables.h Parameter.cpp + Parameter.h parse.cpp + Perplexity.cpp Perplexity.h + Pointer.h + reports.cpp SetArray.cpp + SetArray.h + syncObj.h transpair_model1.h + transpair_model2.h transpair_model3.cpp + transpair_model3.h transpair_model4.cpp + transpair_model4.h transpair_model5.cpp + transpair_model5.h transpair_modelhmm.h + ttableDiff.hpp TTables.cpp + TTables.h types.h + utility.cpp utility.h + Vector.h vocab.cpp + vocab.h WordClasses.h +) + +ADD_LIBRARY(mgiza STATIC ${LIBMGIZA_SRC}) +INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR} ) +INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src/ ) +LINK_DIRECTORIES ( ${LIBRARY_OUTPUT_PATH} ) + +SET( MGIZA_SRC main.cpp ) + +ADD_EXECUTABLE( mgizapp ${MGIZA_SRC} ) + +TARGET_LINK_LIBRARIES ( + mgizapp + mgiza + ${Boost_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT} + ) + + +ADD_EXECUTABLE(snt2cooc snt2cooc.cpp) +ADD_EXECUTABLE(snt2plain snt2plain.cpp) +ADD_EXECUTABLE(plain2snt plain2snt.cpp) +ADD_EXECUTABLE(symal symal.cpp cmd.c) +ADD_EXECUTABLE(hmmnorm hmmnorm.cxx) +ADD_EXECUTABLE(d4norm d4norm.cxx) +TARGET_LINK_LIBRARIES ( + hmmnorm + mgiza + ${Boost_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT} + ) +TARGET_LINK_LIBRARIES ( + d4norm + mgiza + ${Boost_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT} + ) + +INSTALL(TARGETS mgiza mgizapp snt2cooc snt2plain plain2snt symal hmmnorm d4norm + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + ) + + + diff --git a/mgizapp/src/mkcls/CMakeLists.txt b/mgizapp/src/mkcls/CMakeLists.txt new file mode 100644 index 0000000..3554fb7 --- /dev/null +++ b/mgizapp/src/mkcls/CMakeLists.txt @@ -0,0 +1,72 @@ + +# Set output directory + +FIND_PACKAGE(Threads) + + +SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) +SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) + +ADD_DEFINITIONS("-DNDEBUG") +ADD_DEFINITIONS("-Wno-deprecated") +ADD_DEFINITIONS("-Wno-write-strings") + +SET( MKCLS_SRC + Array.h + FixedArray.h + FlexArray.h + GDAOptimization.cpp + GDAOptimization.h + general.cpp + general.h + HCOptimization.cpp + HCOptimization.h + IterOptimization.cpp + IterOptimization.h + KategProblem.cpp + KategProblem.h + KategProblemKBC.cpp + KategProblemKBC.h + KategProblemTest.cpp + KategProblemTest.h + KategProblemWBC.cpp + KategProblemWBC.h + mkcls.cpp + my.h + myassert.h + myleda.h + MYOptimization.cpp + MYOptimization.h + mystl.h + Optimization.cpp + Optimization.h + Problem.cpp + Problem.h + ProblemTest.cpp + ProblemTest.h + RRTOptimization.cpp + RRTOptimization.h + SAOptimization.cpp + SAOptimization.h + StatVar.cpp + StatVar.h + TAOptimization.cpp + TAOptimization.h + + ) + +ADD_EXECUTABLE(mkcls ${MKCLS_SRC}) +INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR} ) +INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src/ ) +INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src/mkcls ) +LINK_DIRECTORIES ( ${LIBRARY_OUTPUT_PATH} ) + + +INSTALL(TARGETS mkcls + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + ) + + + -- cgit v1.2.3