Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mgiza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoredwardgao <edwardgao@9a26d1b7-1c8f-445c-8fdd-6576f508279d>2011-12-30 06:30:19 +0400
committeredwardgao <edwardgao@9a26d1b7-1c8f-445c-8fdd-6576f508279d>2011-12-30 06:30:19 +0400
commit23a14e354c21af641a91055deee97d8c86ca7961 (patch)
tree1617871e945c8f9b509497a3d418534a6aba8c57
parentf6c27d0205d300103a846a836ab8d886a4d2fe7d (diff)
Add support of Cmake
-rw-r--r--mgizapp/CMakeLists.txt67
-rw-r--r--mgizapp/cmake/CheckCXXSourceCompiles.cmake60
-rw-r--r--mgizapp/cmake/FindTR1.cmake84
-rw-r--r--mgizapp/cmake/SRLBoost.cmake21
-rw-r--r--mgizapp/scripts/CMakeLists.txt4
-rwxr-xr-xmgizapp/scripts/force-align-moses-old.sh48
-rwxr-xr-xmgizapp/scripts/force-align-moses.sh23
-rwxr-xr-xmgizapp/scripts/run.sh23
-rw-r--r--mgizapp/src/CMakeLists.txt105
-rw-r--r--mgizapp/src/mkcls/CMakeLists.txt72
10 files changed, 496 insertions, 11 deletions
diff --git a/mgizapp/CMakeLists.txt b/mgizapp/CMakeLists.txt
new file mode 100644
index 0000000..6f9955f
--- /dev/null
+++ b/mgizapp/CMakeLists.txt
@@ -0,0 +1,67 @@
+PROJECT (srlextract)
+
+SET(MGIZA_VERSION_MAJOR "0")
+SET(MGIZA_VERSION_MINOR "6")
+SET(MGIZA_VERSION_PATCH "7")
+
+MATH(EXPR MGIZA_INT_VERSION "(${MGIZA_VERSION_MAJOR} * 10000) + (${MGIZA_VERSION_MINOR} * 100) + (${MGIZA_VERSION_PATCH} * 1)" )
+
+SET(MGIZA_VERSION "${MGIZA_VERSION_MAJOR}.${MGIZA_VERSION_MINOR}.${MGIZA_VERSION_PATCH}")
+
+#CMake 2.6+ is recommended to an improved Boost module
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
+
+IF(COMMAND cmake_policy)
+ cmake_policy(SET CMP0003 NEW)
+ENDIF(COMMAND cmake_policy)
+
+IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+ SET(CMAKE_INSTALL_PREFIX
+ "inst" CACHE PATH "MGIZA install prefix" FORCE
+ )
+ MESSAGE(STATUS "You have not set the install dir, default to './inst', if
+ you want to set it, use cmake -DCMAKE_INSTALL_PREFIX to do so")
+ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
+
+#set various platform specific global options
+IF(WIN32)
+ SET(CMAKE_DEBUG_POSTFIX "d")
+ENDIF(WIN32)
+
+INCLUDE(cmake/FindTR1.cmake)
+
+# include specific modules
+SET(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
+
+
+# Find Boost library, specify
+# BOOST_ROOT=/e/programs/boost_1_35_0
+# BOOST_LIBRARYDIR=$BOOST_ROOT/stage/lib
+
+set(Boost_USE_STATIC_LIBS ON)
+set(Boost_USE_MULTITHREADED ON)
+set(Boost_USE_STATIC_RUNTIME OFF)
+
+
+FIND_PACKAGE( Boost 1.46)
+
+ IF(Boost_FOUND)
+ IF (NOT _boost_IN_CACHE)
+ MESSAGE( "Boost found" )
+ message(STATUS "Boost_INCLUDE_DIR : ${Boost_INCLUDE_DIR}")
+ ENDIF (NOT _boost_IN_CACHE)
+ LINK_DIRECTORIES ( ${Boost_LIBRARY_DIRS} )
+ INCLUDE_DIRECTORIES ( ${Boost_INCLUDE_DIRS} )
+ELSE()
+ MESSAGE(FATAL_ERROR "Boost not found, please set the BOOST_ROOT and BOOST_LIBRARYDIR environment variables " )
+ENDIF()
+
+ADD_SUBDIRECTORY (src)
+ADD_SUBDIRECTORY (src/mkcls)
+ADD_SUBDIRECTORY (scripts)
+
+
+
+
+
+
diff --git a/mgizapp/cmake/CheckCXXSourceCompiles.cmake b/mgizapp/cmake/CheckCXXSourceCompiles.cmake
new file mode 100644
index 0000000..3921c89
--- /dev/null
+++ b/mgizapp/cmake/CheckCXXSourceCompiles.cmake
@@ -0,0 +1,60 @@
+# - Check if the source code provided in the SOURCE argument compiles.
+# CHECK_CXX_SOURCE_COMPILES(SOURCE VAR)
+# - macro which checks if the source code compiles
+# SOURCE - source code to try to compile
+# VAR - variable to store whether the source code compiled
+#
+# The following variables may be set before calling this macro to
+# modify the way the check is run:
+#
+# CMAKE_REQUIRED_FLAGS = string of compile command line flags
+# CMAKE_REQUIRED_DEFINITIONS = list of macros to define (-DFOO=bar)
+# CMAKE_REQUIRED_INCLUDES = list of include directories
+# CMAKE_REQUIRED_LIBRARIES = list of libraries to link
+
+MACRO(CHECK_CXX_SOURCE_COMPILES SOURCE VAR)
+ IF("${VAR}" MATCHES "^${VAR}$")
+ SET(MACRO_CHECK_FUNCTION_DEFINITIONS
+ "-D${VAR} ${CMAKE_REQUIRED_FLAGS}")
+ IF(CMAKE_REQUIRED_LIBRARIES)
+ SET(CHECK_CXX_SOURCE_COMPILES_ADD_LIBRARIES
+ "-DLINK_LIBRARIES:STRING=${CMAKE_REQUIRED_LIBRARIES}")
+ ELSE(CMAKE_REQUIRED_LIBRARIES)
+ SET(CHECK_CXX_SOURCE_COMPILES_ADD_LIBRARIES)
+ ENDIF(CMAKE_REQUIRED_LIBRARIES)
+ IF(CMAKE_REQUIRED_INCLUDES)
+ SET(CHECK_CXX_SOURCE_COMPILES_ADD_INCLUDES
+ "-DINCLUDE_DIRECTORIES:STRING=${CMAKE_REQUIRED_INCLUDES}")
+ ELSE(CMAKE_REQUIRED_INCLUDES)
+ SET(CHECK_CXX_SOURCE_COMPILES_ADD_INCLUDES)
+ ENDIF(CMAKE_REQUIRED_INCLUDES)
+ FILE(WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx"
+ "${SOURCE}\n")
+
+ MESSAGE(STATUS "Performing Test ${VAR}")
+ TRY_COMPILE(${VAR}
+ ${CMAKE_BINARY_DIR}
+ ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx
+ COMPILE_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}
+ CMAKE_FLAGS -DCOMPILE_DEFINITIONS:STRING=${MACRO_CHECK_FUNCTION_DEFINITIONS}
+ "${CHECK_CXX_SOURCE_COMPILES_ADD_LIBRARIES}"
+ "${CHECK_CXX_SOURCE_COMPILES_ADD_INCLUDES}"
+ OUTPUT_VARIABLE OUTPUT)
+ IF(${VAR})
+ SET(${VAR} 1 CACHE INTERNAL "Test ${VAR}")
+ MESSAGE(STATUS "Performing Test ${VAR} - Success")
+ FILE(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
+ "Performing C++ SOURCE FILE Test ${VAR} succeded with the following output:\n"
+ "${OUTPUT}\n"
+ "Source file was:\n${SOURCE}\n")
+ ELSE(${VAR})
+ MESSAGE(STATUS "Performing Test ${VAR} - Failed")
+ SET(${VAR} "" CACHE INTERNAL "Test ${VAR}")
+ FILE(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
+ "Performing C++ SOURCE FILE Test ${VAR} failed with the following output:\n"
+ "${OUTPUT}\n"
+ "Source file was:\n${SOURCE}\n")
+ ENDIF(${VAR})
+ ENDIF("${VAR}" MATCHES "^${VAR}$")
+ENDMACRO(CHECK_CXX_SOURCE_COMPILES)
+
diff --git a/mgizapp/cmake/FindTR1.cmake b/mgizapp/cmake/FindTR1.cmake
new file mode 100644
index 0000000..3ef2b44
--- /dev/null
+++ b/mgizapp/cmake/FindTR1.cmake
@@ -0,0 +1,84 @@
+# Check availability of C++ TR1 contents.
+
+# Sets the following variables:
+#
+# TR1_SHARED_PTR_FOUND -- std::tr1::shared_ptr1<T> available
+# TR1_SHARED_PTR_USE_TR1_MEMORY -- #include <tr1/memory>
+# TR1_SHARED_PTR_USE_MEMORY -- #include <memory>
+
+# We need to have at least this version to support the VERSION_LESS argument to 'if' (2.6.2) and unset (2.6.3)
+cmake_policy(PUSH)
+ cmake_minimum_required(VERSION 2.6.3)
+cmake_policy(POP)
+
+INCLUDE(${PROJECT_SOURCE_DIR}/cmake/CheckCXXSourceCompiles.cmake)
+# ---------------------------------------------------------------------------
+# std::tr1::shared_ptr<T>
+# ---------------------------------------------------------------------------
+
+check_cxx_source_compiles(
+ "
+ #include <tr1/memory>
+ int main() {
+ std::tr1::shared_ptr<int> ptr;
+ return 0;
+ }
+ "
+TR1_SHARED_PTR_USE_TR1_MEMORY)
+check_cxx_source_compiles(
+ "
+ #include <memory>
+ int main() {
+ std::tr1::shared_ptr<int> ptr;
+ return 0;
+ }
+ "
+TR1_SHARED_PTR_USE_MEMORY)
+
+set (TR1_SHARED_PTR -NOTFOUND)
+if (TR1_SHARED_PTR_USE_TR1_MEMORY)
+set (TR1_SHARED_PTR_FOUND TRUE)
+endif (TR1_SHARED_PTR_USE_TR1_MEMORY)
+if (TR1_SHARED_PTR_USE_MEMORY)
+set (TR1_SHARED_PTR_FOUND TRUE)
+endif (TR1_SHARED_PTR_USE_MEMORY)
+
+mark_as_advanced (TR1_SHARED_PTR_FOUND)
+mark_as_advanced (TR1_SHARED_PTR_USE_TR1_MEMORY)
+mark_as_advanced (TR1_SHARED_PTR_USE_MEMORY)
+
+# ---------------------------------------------------------------------------
+# std::tr1::unordered_map<K, V>
+# ---------------------------------------------------------------------------
+
+check_cxx_source_compiles(
+ "
+ #include <tr1/unordered_map>
+ int main() {
+ std::tr1::unordered_map<int, int> m;
+ return 0;
+ }
+ "
+ TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP)
+check_cxx_source_compiles(
+ "
+ #include <unordered_map>
+ int main() {
+ std::tr1::unordered_map<int, int> m;
+ return 0;
+ }
+ "
+ TR1_UNORDERED_MAP_USE_UNORDERED_MAP)
+
+set (TR1_UNORDERED_MAP -NOTFOUND)
+if (TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP)
+set (TR1_UNORDERED_MAP_FOUND TRUE)
+endif (TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP)
+if (TR1_UNORDERED_MAP_USE_UNORDERED_MAP)
+set (TR1_UNORDERED_MAP_FOUND TRUE)
+endif (TR1_UNORDERED_MAP_USE_UNORDERED_MAP)
+
+mark_as_advanced (TR1_UNORDERED_MAP_FOUND)
+mark_as_advanced (TR1_UNORDERED_MAP_USE_TR1_UNORDERED_MAP)
+mark_as_advanced (TR1_UNORDERED_MAP_USE_UNORDERED_MAP)
+
diff --git a/mgizapp/cmake/SRLBoost.cmake b/mgizapp/cmake/SRLBoost.cmake
new file mode 100644
index 0000000..af3a956
--- /dev/null
+++ b/mgizapp/cmake/SRLBoost.cmake
@@ -0,0 +1,21 @@
+#Locate Boost libs. Windows users: make sure BOOST_ROOT and BOOST_PATH are set correctly on your environment.
+#See the site FAQ for more details.
+
+MACRO (GET_BOOST_INCLUDE_PATH path libs)
+ #todo: allow this to fall back on a local distributed copy, so user doesn't have to d/l Boost seperately
+
+ #todo: limit Boost version?
+ #todo: use COMPONENTS threads to locate boost_threads without breaking the current support
+ IF(Boost_FOUND)
+ IF (NOT _boost_IN_CACHE)
+ MESSAGE( "Boost found" )
+ message(STATUS "Boost_INCLUDE_DIR : ${Boost_INCLUDE_DIR}")
+ ENDIF (NOT _boost_IN_CACHE)
+ SET(${path} ${Boost_INCLUDE_DIRS} )
+ SET(${libs} ${Boost_LIBRARIES} )
+ link_directories ( ${Boost_LIBRARY_DIRS} )
+ ELSE()
+ MESSAGE(FATAL_ERROR "Boost not found, please set the BOOST_ROOT environment variable " )
+ ENDIF()
+ENDMACRO (GET_BOOST_INCLUDE_PATH path libs)
+
diff --git a/mgizapp/scripts/CMakeLists.txt b/mgizapp/scripts/CMakeLists.txt
new file mode 100644
index 0000000..afa694e
--- /dev/null
+++ b/mgizapp/scripts/CMakeLists.txt
@@ -0,0 +1,4 @@
+INSTALL(PROGRAMS force-align-moses.sh giza2bal.pl merge_alignment.py plain2snt-hasvcb.py sntpostproc.py force-align-moses-old.sh run.sh
+ DESTINATION scripts
+ PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE
+ )
diff --git a/mgizapp/scripts/force-align-moses-old.sh b/mgizapp/scripts/force-align-moses-old.sh
new file mode 100755
index 0000000..fd4cf12
--- /dev/null
+++ b/mgizapp/scripts/force-align-moses-old.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+MGIZA=${QMT_HOME}/bin/mgiza
+
+if [ $# -lt 4 ]; then
+ echo "OK, this is simple, put me into your Moses training directory, link your source/target corpus" 1>&2
+ echo "and run " $0 " PREFIX src_tag tgt_tag root-dir." 1>&2
+ echo "and get force-aligned data: root-dir/giza.[src-tgt|tgt-src]/*.A3.final.* " 1>&2
+ echo "make sure I can find PREFIX.src_tag-tgt_tag and PREFIX.tgt_tag-src_tag, and \${QMT_HOME} is set" 1>&2
+ exit
+fi
+
+PRE=$1
+SRC=$2
+TGT=$3
+ROOT=$4
+
+mkdir -p $ROOT/giza.${SRC}-${TGT}
+mkdir -p $ROOT/giza.${TGT}-${SRC}
+mkdir -p $ROOT/corpus
+
+echo "Generating corpus file " 1>&2
+
+${QMT_HOME}/scripts/plain2snt-hasvcb.py corpus/$SRC.vcb corpus/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/corpus/${TGT}-${SRC}.snt $ROOT/corpus/${SRC}-${TGT}.snt $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb
+
+ln -sf $PWD/corpus/$SRC.vcb.classes $PWD/corpus/$TGT.vcb.classes $ROOT/corpus/
+
+echo "Generating co-occurrence file " 1>&2
+
+${QMT_HOME}/bin/snt2cooc $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb $ROOT/corpus/${TGT}-${SRC}.snt
+${QMT_HOME}/bin//snt2cooc $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc $ROOT/corpus/$TGT.vcb $ROOT/corpus/$SRC.vcb $ROOT/corpus/${SRC}-${TGT}.snt
+
+echo "Running force alignment " 1>&2
+
+$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/corpus/$TGT-$SRC.snt -o $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC} \
+-s $ROOT/corpus/$SRC.vcb -t $ROOT/corpus/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc \
+-restart 11 -previoust giza.$TGT-$SRC/$TGT-$SRC.t3.final \
+-previousa giza.$TGT-$SRC/$TGT-$SRC.a3.final -previousd giza.$TGT-$SRC/$TGT-$SRC.d3.final \
+-previousn giza.$TGT-$SRC/$TGT-$SRC.n3.final -previousd4 giza.$TGT-$SRC/$TGT-$SRC.d4.final \
+-previousd42 giza.$TGT-$SRC/$TGT-$SRC.D4.final -m3 0 -m4 1
+
+$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/corpus/$SRC-$TGT.snt -o $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT} \
+-s $ROOT/corpus/$TGT.vcb -t $ROOT/corpus/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc \
+-restart 11 -previoust giza.$SRC-$TGT/$SRC-$TGT.t3.final \
+-previousa giza.$SRC-$TGT/$SRC-$TGT.a3.final -previousd giza.$SRC-$TGT/$SRC-$TGT.d3.final \
+-previousn giza.$SRC-$TGT/$SRC-$TGT.n3.final -previousd4 giza.$SRC-$TGT/$SRC-$TGT.d4.final \
+-previousd42 giza.$SRC-$TGT/$SRC-$TGT.D4.final -m3 0 -m4 1
+
diff --git a/mgizapp/scripts/force-align-moses.sh b/mgizapp/scripts/force-align-moses.sh
index fd4cf12..ac95bcb 100755
--- a/mgizapp/scripts/force-align-moses.sh
+++ b/mgizapp/scripts/force-align-moses.sh
@@ -14,33 +14,34 @@ PRE=$1
SRC=$2
TGT=$3
ROOT=$4
+NUM=$5
-mkdir -p $ROOT/giza.${SRC}-${TGT}
-mkdir -p $ROOT/giza.${TGT}-${SRC}
-mkdir -p $ROOT/corpus
+mkdir -p $ROOT/giza-inverse.${NUM}
+mkdir -p $ROOT/giza.${NUM}
+mkdir -p $ROOT/prepared.${NUM}
echo "Generating corpus file " 1>&2
-${QMT_HOME}/scripts/plain2snt-hasvcb.py corpus/$SRC.vcb corpus/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/corpus/${TGT}-${SRC}.snt $ROOT/corpus/${SRC}-${TGT}.snt $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb
+${QMT_HOME}/scripts/plain2snt-hasvcb.py prepared.${NUM}/$SRC.vcb prepared.${NUM}/$TGT.vcb ${PRE}.${SRC} ${PRE}.${TGT} $ROOT/prepared.${NUM}/${TGT}-${SRC}.snt $ROOT/prepared.${NUM}/${SRC}-${TGT}.snt $ROOT/prepared.${NUM}/$SRC.vcb $ROOT/prepared.${NUM}/$TGT.vcb
-ln -sf $PWD/corpus/$SRC.vcb.classes $PWD/corpus/$TGT.vcb.classes $ROOT/corpus/
+ln -sf $PWD/prepared.${NUM}/$SRC.vcb.classes $PWD/prepared.${NUM}/$TGT.vcb.classes $ROOT/prepared.${NUM}/
echo "Generating co-occurrence file " 1>&2
-${QMT_HOME}/bin/snt2cooc $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc $ROOT/corpus/$SRC.vcb $ROOT/corpus/$TGT.vcb $ROOT/corpus/${TGT}-${SRC}.snt
-${QMT_HOME}/bin//snt2cooc $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc $ROOT/corpus/$TGT.vcb $ROOT/corpus/$SRC.vcb $ROOT/corpus/${SRC}-${TGT}.snt
+${QMT_HOME}/bin/snt2cooc $ROOT/giza.${NUM}/$TGT-${SRC}.cooc $ROOT/prepared.${NUM}/$SRC.vcb $ROOT/prepared.${NUM}/$TGT.vcb $ROOT/prepared.${NUM}/${TGT}-${SRC}.snt
+${QMT_HOME}/bin//snt2cooc $ROOT/giza-inverse.${NUM}/$SRC-${TGT}.cooc $ROOT/prepared.${NUM}/$TGT.vcb $ROOT/prepared.${NUM}/$SRC.vcb $ROOT/prepared.${NUM}/${SRC}-${TGT}.snt
echo "Running force alignment " 1>&2
-$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/corpus/$TGT-$SRC.snt -o $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC} \
--s $ROOT/corpus/$SRC.vcb -t $ROOT/corpus/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${TGT}-${SRC}/$TGT-${SRC}.cooc \
+$MGIZA giza.$TGT-$SRC/$TGT-$SRC.gizacfg -c $ROOT/prepared.${NUM}/$TGT-$SRC.snt -o $ROOT/giza.${NUM}/$TGT-${SRC} \
+-s $ROOT/prepared.${NUM}/$SRC.vcb -t $ROOT/prepared.${NUM}/$TGT.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${NUM}/$TGT-${SRC}.cooc \
-restart 11 -previoust giza.$TGT-$SRC/$TGT-$SRC.t3.final \
-previousa giza.$TGT-$SRC/$TGT-$SRC.a3.final -previousd giza.$TGT-$SRC/$TGT-$SRC.d3.final \
-previousn giza.$TGT-$SRC/$TGT-$SRC.n3.final -previousd4 giza.$TGT-$SRC/$TGT-$SRC.d4.final \
-previousd42 giza.$TGT-$SRC/$TGT-$SRC.D4.final -m3 0 -m4 1
-$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/corpus/$SRC-$TGT.snt -o $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT} \
--s $ROOT/corpus/$TGT.vcb -t $ROOT/corpus/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza.${SRC}-${TGT}/$SRC-${TGT}.cooc \
+$MGIZA giza.$SRC-$TGT/$SRC-$TGT.gizacfg -c $ROOT/prepared.${NUM}/$SRC-$TGT.snt -o $ROOT/giza-inverse.${NUM}/$SRC-${TGT} \
+-s $ROOT/prepared.${NUM}/$TGT.vcb -t $ROOT/prepared.${NUM}/$SRC.vcb -m1 0 -m2 0 -mh 0 -coocurrence $ROOT/giza-inverse.${NUM}/$SRC-${TGT}.cooc \
-restart 11 -previoust giza.$SRC-$TGT/$SRC-$TGT.t3.final \
-previousa giza.$SRC-$TGT/$SRC-$TGT.a3.final -previousd giza.$SRC-$TGT/$SRC-$TGT.d3.final \
-previousn giza.$SRC-$TGT/$SRC-$TGT.n3.final -previousd4 giza.$SRC-$TGT/$SRC-$TGT.d4.final \
diff --git a/mgizapp/scripts/run.sh b/mgizapp/scripts/run.sh
new file mode 100755
index 0000000..2bb3972
--- /dev/null
+++ b/mgizapp/scripts/run.sh
@@ -0,0 +1,23 @@
+PRE=test
+SRC=fr
+TGT=en
+NUM=1
+SCRIPT_DIR=/opt/AO/sw/edinburgh-code/scripts-20110926-1425
+
+export QMT_HOME=/root/workspace/mgizapp
+
+rm -rf out
+
+$QMT_HOME/scripts/force-align-moses.sh $PRE $SRC $TGT out 1
+
+echo "FINISHED forced alignment"
+
+$SCRIPT_DIR/../merge_alignment.py out/giza-inverse.$NUM/$SRC-$TGT.A3.final.part* | gzip -c > out/giza-inverse.$NUM/$SRC-$TGT.A3.final.gz
+$SCRIPT_DIR/../merge_alignment.py out/giza.$NUM/$TGT-$SRC.A3.final.part* | gzip -c > out/giza.$NUM/$TGT-$SRC.A3.final.gz
+
+$SCRIPT_DIR/training/symal/giza2bal.pl -d "gzip -cd out/giza.$NUM/$TGT-$SRC.A3.final.gz" -i "gzip -cd out/giza-inverse.$NUM/$SRC-$TGT.A3.final.gz" | $SCRIPT_DIR/training/symal/symal -alignment="grow" -diagonal="yes" -final="yes" -both="yes" > out/aligned.1.grow-diag-final-and
+
+echo "FINISHED giza2bal & symal"
+
+$SCRIPT_DIR/training/phrase-extract/extract $PRE.$TGT $PRE.$SRC out/aligned.1.grow-diag-final-and out/extract.1 7 orientation --model wbe-msd
+
diff --git a/mgizapp/src/CMakeLists.txt b/mgizapp/src/CMakeLists.txt
new file mode 100644
index 0000000..b4b4f65
--- /dev/null
+++ b/mgizapp/src/CMakeLists.txt
@@ -0,0 +1,105 @@
+
+# Set output directory
+
+FIND_PACKAGE(Threads)
+
+
+SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
+SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
+
+ADD_DEFINITIONS("-DNDEBUG")
+ADD_DEFINITIONS("-DWORDINDEX_WITH_4_BYTE")
+ADD_DEFINITIONS("-DBINARY_SEARCH_FOR_TTABLE")
+ADD_DEFINITIONS("-DDEBUG")
+ADD_DEFINITIONS("-Wno-deprecated")
+ADD_DEFINITIONS("-Wno-write-strings")
+
+SET( LIBMGIZA_SRC
+ alignment.cpp alignment.h
+ AlignTables.cpp AlignTables.h
+ Array2.h Array4.h
+ Array.h ATables.cpp
+ ATables.h cmd.c
+ cmd.h collCounts.cpp
+ collCounts.h common.h
+ D4Tables.h
+ D5Tables.h defs.h
+ Dictionary.cpp Dictionary.h
+ file_spec.h FlexArray.h
+ ForwardBackward.cpp ForwardBackward.h
+ getSentence.cpp getSentence.h
+ Globals.h hmm.cpp
+ hmm.h
+ HMMTables.cpp HMMTables.h
+ logprob.cpp logprob.h
+ model1.cpp
+ model1.h model2.cpp
+ model2.h model2to3.cpp
+ model345-peg.cpp model3.cpp
+ model3.h model3_viterbi.cpp
+ model3_viterbi_with_tricks.cpp MoveSwapMatrix.cpp
+ MoveSwapMatrix.h myassert.cpp
+ myassert.h mymath.h
+ mystl.h NTables.cpp
+ NTables.h Parameter.cpp
+ Parameter.h parse.cpp
+ Perplexity.cpp Perplexity.h
+ Pointer.h
+ reports.cpp SetArray.cpp
+ SetArray.h
+ syncObj.h transpair_model1.h
+ transpair_model2.h transpair_model3.cpp
+ transpair_model3.h transpair_model4.cpp
+ transpair_model4.h transpair_model5.cpp
+ transpair_model5.h transpair_modelhmm.h
+ ttableDiff.hpp TTables.cpp
+ TTables.h types.h
+ utility.cpp utility.h
+ Vector.h vocab.cpp
+ vocab.h WordClasses.h
+)
+
+ADD_LIBRARY(mgiza STATIC ${LIBMGIZA_SRC})
+INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR} )
+INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src/ )
+LINK_DIRECTORIES ( ${LIBRARY_OUTPUT_PATH} )
+
+SET( MGIZA_SRC main.cpp )
+
+ADD_EXECUTABLE( mgizapp ${MGIZA_SRC} )
+
+TARGET_LINK_LIBRARIES (
+ mgizapp
+ mgiza
+ ${Boost_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
+ )
+
+
+ADD_EXECUTABLE(snt2cooc snt2cooc.cpp)
+ADD_EXECUTABLE(snt2plain snt2plain.cpp)
+ADD_EXECUTABLE(plain2snt plain2snt.cpp)
+ADD_EXECUTABLE(symal symal.cpp cmd.c)
+ADD_EXECUTABLE(hmmnorm hmmnorm.cxx)
+ADD_EXECUTABLE(d4norm d4norm.cxx)
+TARGET_LINK_LIBRARIES (
+ hmmnorm
+ mgiza
+ ${Boost_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
+ )
+TARGET_LINK_LIBRARIES (
+ d4norm
+ mgiza
+ ${Boost_LIBRARIES}
+ ${CMAKE_THREAD_LIBS_INIT}
+ )
+
+INSTALL(TARGETS mgiza mgizapp snt2cooc snt2plain plain2snt symal hmmnorm d4norm
+ RUNTIME DESTINATION bin
+ LIBRARY DESTINATION lib
+ ARCHIVE DESTINATION lib
+ )
+
+
+
diff --git a/mgizapp/src/mkcls/CMakeLists.txt b/mgizapp/src/mkcls/CMakeLists.txt
new file mode 100644
index 0000000..3554fb7
--- /dev/null
+++ b/mgizapp/src/mkcls/CMakeLists.txt
@@ -0,0 +1,72 @@
+
+# Set output directory
+
+FIND_PACKAGE(Threads)
+
+
+SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
+SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
+
+ADD_DEFINITIONS("-DNDEBUG")
+ADD_DEFINITIONS("-Wno-deprecated")
+ADD_DEFINITIONS("-Wno-write-strings")
+
+SET( MKCLS_SRC
+ Array.h
+ FixedArray.h
+ FlexArray.h
+ GDAOptimization.cpp
+ GDAOptimization.h
+ general.cpp
+ general.h
+ HCOptimization.cpp
+ HCOptimization.h
+ IterOptimization.cpp
+ IterOptimization.h
+ KategProblem.cpp
+ KategProblem.h
+ KategProblemKBC.cpp
+ KategProblemKBC.h
+ KategProblemTest.cpp
+ KategProblemTest.h
+ KategProblemWBC.cpp
+ KategProblemWBC.h
+ mkcls.cpp
+ my.h
+ myassert.h
+ myleda.h
+ MYOptimization.cpp
+ MYOptimization.h
+ mystl.h
+ Optimization.cpp
+ Optimization.h
+ Problem.cpp
+ Problem.h
+ ProblemTest.cpp
+ ProblemTest.h
+ RRTOptimization.cpp
+ RRTOptimization.h
+ SAOptimization.cpp
+ SAOptimization.h
+ StatVar.cpp
+ StatVar.h
+ TAOptimization.cpp
+ TAOptimization.h
+
+ )
+
+ADD_EXECUTABLE(mkcls ${MKCLS_SRC})
+INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR} )
+INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src/ )
+INCLUDE_DIRECTORIES( ${PROJECT_SOURCE_DIR}/src/mkcls )
+LINK_DIRECTORIES ( ${LIBRARY_OUTPUT_PATH} )
+
+
+INSTALL(TARGETS mkcls
+ RUNTIME DESTINATION bin
+ LIBRARY DESTINATION lib
+ ARCHIVE DESTINATION lib
+ )
+
+
+