Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/sentencepiece.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTaku Kudo <taku@google.com>2018-07-24 09:22:49 +0300
committerTaku Kudo <taku@google.com>2018-07-24 09:22:49 +0300
commit89831f80c125335a56807a1c738f2f509d03d6d5 (patch)
tree6d7dbfcaf2f195ae2d24017f6eee3049e2d7749f /src
parentd64cc9ada66c4a601536b94e88132937f8768e9c (diff)
Switched to cmake
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt231
-rw-r--r--src/Makefile.am108
-rw-r--r--src/bpe_model_trainer_test.cc18
-rw-r--r--src/builder.cc5
-rw-r--r--src/builder_test.cc5
-rw-r--r--src/common.h2
-rw-r--r--src/flags.cc5
-rw-r--r--src/sentencepiece_trainer_test.cc35
-rw-r--r--src/test_main.cc6
-rw-r--r--src/unigram_model_trainer_test.cc7
10 files changed, 283 insertions, 139 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..59206d2
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,231 @@
+# Copyright 2018 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.!
+
+find_package(Protobuf REQUIRED)
+include_directories(${Protobuf_INCLUDE_DIRS})
+protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto)
+protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto)
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+include_directories(${PROTOBUF_INCLUDE_DIR})
+
+set(SPM_SRCS
+ ${SPM_PROTO_HDRS}
+ ${SPM_PROTO_SRCS}
+ ${SPM_MODEL_PROTO_HDRS}
+ ${SPM_MODEL_PROTO_SRCS}
+ bpe_model.h
+ common.h
+ normalizer.h
+ util.h
+ flags.h
+ sentencepiece_processor.h
+ word_model.h
+ model_factory.h
+ char_model.h
+ model_interface.h
+ testharness.h
+ unigram_model.h
+ bpe_model.cc
+ char_model.cc
+ error.cc
+ flags.cc
+ model_factory.cc
+ model_interface.cc
+ normalizer.cc
+ sentencepiece_processor.cc
+ unigram_model.cc
+ util.cc
+ word_model.cc
+ ../third_party/absl/strings/string_view.cc)
+
+set(SPM_TRAIN_SRCS
+ ${SPM_PROTO_HDRS}
+ ${SPM_MODEL_PROTO_HDRS}
+ builder.h
+ normalization_rule.h
+ unicode_script.h
+ unicode_script_map.h
+ trainer_factory.h
+ trainer_interface.h
+ unigram_model_trainer.h
+ word_model_trainer.h
+ char_model_trainer.h
+ bpe_model_trainer.h
+ sentencepiece_trainer.h
+ builder.cc
+ unicode_script.cc
+ trainer_factory.cc
+ trainer_interface.cc
+ unigram_model_trainer.cc
+ word_model_trainer.cc
+ char_model_trainer.cc
+ bpe_model_trainer.cc
+ sentencepiece_trainer.cc)
+
+set(SPM_TEST_SRCS
+ ${SPM_PROTO_HDRS}
+ ${SPM_MODEL_PROTO_HDRS}
+ testharness.h
+ bpe_model_test.cc
+ bpe_model_trainer_test.cc
+ builder_test.cc
+ char_model_test.cc
+ char_model_trainer_test.cc
+ flags_test.cc
+ model_factory_test.cc
+ model_interface_test.cc
+ normalizer_test.cc
+ sentencepiece_processor_test.cc
+ sentencepiece_trainer_test.cc
+ test_main.cc
+ testharness.cc
+ trainer_factory_test.cc
+ trainer_interface_test.cc
+ unicode_script_test.cc
+ unigram_model_test.cc
+ unigram_model_trainer_test.cc
+ util_test.cc
+ word_model_test.cc
+ word_model_trainer_test.cc)
+
+find_package(Threads REQUIRED)
+
+set(SPM_LIBS ${PROTOBUF_LIBRARY} Threads::Threads)
+
+if (SPM_ENABLE_NFKC_COMPILE)
+ find_package(ICU 4.4 COMPONENTS i18n data uc REQUIRED)
+ include_directories(${ICU_INCLUDE_DIRS})
+ add_definitions(-DENABLE_NFKC_COMPILE)
+ list(APPEND SPM_LIBS ICU::i18n ICU::data ICU::uc)
+endif()
+
+if (SPM_ENABLE_TCMALLOC)
+ if (SPM_TCMALLOC_STATIC)
+ find_library(TCMALLOC_LIB NAMES libtcmalloc_minimal.a)
+ else()
+ find_library(TCMALLOC_LIB NAMES tcmalloc_minimal)
+ endif()
+ if (TCMALLOC_LIB)
+ list(APPEND SPM_LIBS ${TCMALLOC_LIB})
+ endif()
+endif()
+
+if (SPM_ENABLE_SHARED)
+ add_library(sentencepiece SHARED ${SPM_SRCS})
+ add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS})
+endif()
+
+add_library(sentencepiece-static STATIC ${SPM_SRCS})
+add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS})
+
+target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS})
+target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS})
+
+if (SPM_ENABLE_SHARED)
+ target_link_libraries(sentencepiece ${SPM_LIBS})
+ target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece)
+ set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static)
+ set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.1.0)
+ if (MSVC)
+ set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX "_import.lib")
+ set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX "_import.lib")
+ elseif (MINGW)
+ set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX ".dll.a")
+ set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX ".dll.a")
+ endif()
+else()
+ add_library(sentencepiece ALIAS sentencepiece-static)
+ add_library(sentencepiece_train ALIAS sentencepiece_train-static)
+ set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static)
+endif()
+
+set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece")
+set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train")
+
+if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+ if (SPM_COVERAGE)
+ set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}")
+ else()
+ set(CMAKE_CXX_FLAGS "-O3 -Wall -fPIC ${CMAKE_CXX_FLAGS}")
+ endif()
+ if (SPM_ENABLE_TENSORFLOW_SHARED)
+ add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+ endif()
+ set_source_files_properties(
+ sentencepiece.pb.cc sentencepiece_model.pb.cc
+ PROPERTIES COMPILE_FLAGS "-Wno-misleading-indentation")
+ set_source_files_properties(${SPM_TEST_SRCS}
+ PROPERTIES COMPILE_FLAGS "-Wno-sign-compare")
+ if (SPM_ENABLE_SHARED)
+ set_property(TARGET sentencepiece APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC")
+ set_property(TARGET sentencepiece_train APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC")
+ endif()
+endif()
+
+add_executable(spm_encode spm_encode_main.cc)
+add_executable(spm_decode spm_decode_main.cc)
+add_executable(spm_normalize spm_normalize_main.cc)
+add_executable(spm_train spm_train_main.cc)
+add_executable(spm_export_vocab spm_export_vocab_main.cc)
+
+target_link_libraries(spm_encode sentencepiece)
+target_link_libraries(spm_decode sentencepiece)
+target_link_libraries(spm_normalize sentencepiece sentencepiece_train)
+target_link_libraries(spm_train sentencepiece sentencepiece_train)
+target_link_libraries(spm_export_vocab sentencepiece)
+
+if (SPM_ENABLE_NFKC_COMPILE)
+ add_executable(compile_charsmap compile_charsmap_main.cc)
+ target_link_libraries(compile_charsmap sentencepiece)
+endif()
+
+list(APPEND SPM_INSTALLTARGETS
+ spm_encode spm_decode spm_normalize spm_train spm_export_vocab)
+
+install(TARGETS ${SPM_INSTALLTARGETS}
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+install(FILES sentencepiece_trainer.h sentencepiece_processor.h
+ DESTINATION ${CMAKE_INSTALL_INCDIR})
+
+if (SPM_BUILD_TEST OR SPM_COVERAGE)
+ enable_testing()
+ add_executable(spm_test test_main.cc ${SPM_TEST_SRCS})
+
+ if (SPM_COVERAGE)
+ target_link_libraries(spm_test sentencepiece sentencepiece_train "-lgcov")
+ else()
+ target_link_libraries(spm_test sentencepiece sentencepiece_train)
+ endif()
+
+ set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --show-leak-kinds=definite,possible --error-exitcode=1")
+ find_program(CTEST_MEMORYCHECK_COMMAND NAMES valgrind)
+ include(Dart)
+
+ add_test(NAME sentencepiece_test
+ COMMAND $<TARGET_FILE:spm_test> --data_dir=${PROJECT_SOURCE_DIR}/data)
+endif()
+
+if (SPM_COVERAGE)
+ add_custom_target(coverage
+ COMMAND mkdir -p coverage
+ COMMAND $<TARGET_FILE:spm_test> --data_dir=${PROJECT_SOURCE_DIR}/data
+ COMMAND lcov -c -d . -o coverage.info
+ COMMAND lcov --remove coverage.info "include*" "/c++" "_test*" "testharness*" "third_party*" ".pb.*" -o coverage.info
+ COMMAND mkdir -p lcov_html
+ COMMAND genhtml -o lcov_html coverage.info)
+ add_dependencies(coverage spm_test)
+endif()
diff --git a/src/Makefile.am b/src/Makefile.am
deleted file mode 100644
index d815a59..0000000
--- a/src/Makefile.am
+++ /dev/null
@@ -1,108 +0,0 @@
-lib_LTLIBRARIES = libsentencepiece.la libsentencepiece_train.la
-
-AM_CXXFLAS = -I($srcdir)
-AUTOMAKE_OPTIONS = subdir-objects
-
-libsentencepiece_la_SOURCES = \
- error.cc \
- flags.cc \
- sentencepiece_processor.cc \
- util.cc \
- normalizer.cc \
- unicode_script_map.h util.h \
- common.h \
- flags.h normalizer.h sentencepiece_processor.h \
- model_factory.h model_factory.cc \
- model_interface.h model_interface.cc \
- unigram_model.h unigram_model.cc \
- word_model.h word_model.cc \
- char_model.h char_model.cc \
- bpe_model.h bpe_model.cc \
- ../third_party/absl/strings/string_view.cc
-include_HEADERS = sentencepiece_processor.h sentencepiece_trainer.h
-
-# noinst_LIBRARIES = libsentencepiecetrain.a
-libsentencepiece_train_la_SOURCES = builder.cc builder.h \
- normalization_rule.h \
- unicode_script.h unicode_script.cc \
- trainer_factory.h trainer_factory.cc \
- trainer_interface.h trainer_interface.cc \
- unigram_model_trainer.h unigram_model_trainer.cc \
- word_model_trainer.h word_model_trainer.cc \
- char_model_trainer.h char_model_trainer.cc \
- bpe_model_trainer.h bpe_model_trainer.cc \
- sentencepiece_trainer.h sentencepiece_trainer.cc
-
-nodist_libsentencepiece_la_SOURCES = \
- sentencepiece.pb.cc sentencepiece.pb.h \
- sentencepiece_model.pb.cc sentencepiece_model.pb.h
-
-BUILT_SOURCES = \
- sentencepiece.pb.cc \
- sentencepiece_model.pb.cc
-
-EXTRA_DIST = sentencepiece.proto sentencepiece_model.proto
-
-bin_PROGRAMS = spm_encode spm_decode spm_normalize spm_train spm_export_vocab
-noinst_PROGRAMS = compile_charsmap
-
-spm_encode_SOURCES = spm_encode_main.cc
-spm_encode_LDADD = libsentencepiece.la
-
-spm_decode_SOURCES = spm_decode_main.cc
-spm_decode_LDADD = libsentencepiece.la
-
-spm_normalize_SOURCES = spm_normalize_main.cc
-spm_normalize_LDADD = libsentencepiece_train.la libsentencepiece.la
-
-spm_export_vocab_SOURCES = spm_export_vocab_main.cc
-spm_export_vocab_LDADD = libsentencepiece.la
-
-spm_train_SOURCES = spm_train_main.cc
-spm_train_LDADD = libsentencepiece_train.la libsentencepiece.la
-
-compile_charsmap_SOURCES = compile_charsmap_main.cc
-compile_charsmap_LDADD = libsentencepiece_train.la libsentencepiece.la
-
-
-check_PROGRAMS = spm_test
-TESTS = spm_test
-spm_test_SOURCES = testharness.h \
- builder_test.cc \
- flags_test.cc \
- normalizer_test.cc \
- sentencepiece_processor_test.cc \
- sentencepiece_trainer_test.cc \
- unicode_script_test.cc \
- model_interface_test.cc \
- model_factory_test.cc \
- trainer_interface_test.cc \
- trainer_factory_test.cc \
- word_model_test.cc \
- word_model_trainer_test.cc \
- bpe_model_test.cc \
- bpe_model_trainer_test.cc \
- char_model_test.cc \
- char_model_trainer_test.cc \
- unigram_model_test.cc\
- unigram_model_trainer_test.cc \
- util_test.cc \
- test_main.cc \
- testharness.cc
-
-spm_test_LDADD = libsentencepiece_train.la libsentencepiece.la
-
-CLEANFILES = *.pb.cc *.pb.h *.pb.h *.gcda *.gcno *.info
-clean-local:
- -rm -rf lcov_html
-
-%.pb.cc %.pb.h: %.proto
- $(PROTOC) --cpp_out=$(srcdir) $<
-
-coverage:
- make clean
- make -j10 CXXFLAGS+="-O0 -Wall -std=c++11 -coverage" LIBS+="-lgcov -lprotobuf" check
- lcov -c -d . -o coverage.info
- lcov --remove coverage.info "include*" "/c++" "_test*" "testharness*" "third_party*" ".pb.*" -o coverage.info
- mkdir -p lcov_html
- genhtml -o lcov_html coverage.info
diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc
index 71d49ba..01e3864 100644
--- a/src/bpe_model_trainer_test.cc
+++ b/src/bpe_model_trainer_test.cc
@@ -16,11 +16,14 @@
#include <string>
#include <vector>
+#include "flags.h"
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
#include "util.h"
+DECLARE_string(data_dir);
+
namespace sentencepiece {
namespace bpe {
namespace {
@@ -87,13 +90,14 @@ TEST(BPETrainerTest, BasicTest) {
TEST(BPETrainerTest, EndToEndTest) {
const test::ScopedTempFile sf("tmp_model");
- EXPECT_OK(SentencePieceTrainer::Train(
- std::string("--model_prefix=") + sf.filename() +
- " --input=../data/wagahaiwa_nekodearu.txt"
- " --vocab_size=8000"
- " --normalization_rule_name=identity"
- " --model_type=bpe"
- " --control_symbols=<ctrl>"));
+ EXPECT_OK(SentencePieceTrainer::Train(std::string("--model_prefix=") +
+ sf.filename() +
+ " --input=" + FLAGS_data_dir +
+ "/wagahaiwa_nekodearu.txt"
+ " --vocab_size=8000"
+ " --normalization_rule_name=identity"
+ " --model_type=bpe"
+ " --control_symbols=<ctrl>"));
SentencePieceProcessor sp;
EXPECT_OK(sp.Load(std::string(sf.filename()) + ".model"));
diff --git a/src/builder.cc b/src/builder.cc
index be5e45d..e42503d 100644
--- a/src/builder.cc
+++ b/src/builder.cc
@@ -17,6 +17,8 @@
#include <functional>
#include <utility>
+#include "config.h"
+
#ifdef ENABLE_NFKC_COMPILE
#include <unicode/errorcode.h>
#include <unicode/locid.h>
@@ -326,7 +328,8 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
if (nfkc == nfkd) {
continue;
}
- // Expand all possible sequences which are normalized into the same `nfkd`.
+ // Expand all possible sequences which are normalized into the same
+ // `nfkd`.
for (const auto &nfkd_orig : ExpandUnnormalized(nfkd, norm2orig)) {
if (nfkd_orig != nfkc) {
nfkc_map[nfkd_orig] = nfkc;
diff --git a/src/builder_test.cc b/src/builder_test.cc
index 212d3d1..a3af444 100644
--- a/src/builder_test.cc
+++ b/src/builder_test.cc
@@ -14,11 +14,14 @@
#include "builder.h"
#include "common.h"
+#include "flags.h"
#include "normalizer.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
#include "util.h"
+DECLARE_string(data_dir);
+
namespace sentencepiece {
namespace normalizer {
@@ -135,7 +138,7 @@ TEST(BuilderTest, CompileCharsMap) {
TEST(BuilderTest, LoadCharsMapTest) {
Builder::CharsMap chars_map;
- EXPECT_OK(Builder::LoadCharsMap("../data/nfkc.tsv", &chars_map));
+ EXPECT_OK(Builder::LoadCharsMap(FLAGS_data_dir + "/nfkc.tsv", &chars_map));
std::string precompiled, expected;
EXPECT_OK(Builder::CompileCharsMap(chars_map, &precompiled));
diff --git a/src/common.h b/src/common.h
index 4516be9..7e75bda 100644
--- a/src/common.h
+++ b/src/common.h
@@ -24,9 +24,7 @@
#include <utility>
#include <vector>
-#ifdef HAVE_CONFIG_H
#include "config.h"
-#endif
#if defined(_WIN32) && !defined(__CYGWIN__)
#define OS_WIN
diff --git a/src/flags.cc b/src/flags.cc
index c33e035..830c2cb 100644
--- a/src/flags.cc
+++ b/src/flags.cc
@@ -23,11 +23,8 @@
#include <utility>
#include "common.h"
-#include "util.h"
-
-#ifdef HAVE_CONFIG_H
#include "config.h"
-#endif
+#include "util.h"
namespace sentencepiece {
namespace flags {
diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc
index 0c2107d..ead34c1 100644
--- a/src/sentencepiece_trainer_test.cc
+++ b/src/sentencepiece_trainer_test.cc
@@ -13,31 +13,36 @@
// limitations under the License.!
#include "sentencepiece_trainer.h"
+#include "flags.h"
#include "sentencepiece_model.pb.h"
#include "testharness.h"
#include "util.h"
+DECLARE_string(data_dir);
+
namespace sentencepiece {
namespace {
TEST(SentencePieceTrainerTest, TrainFromArgsTest) {
- SentencePieceTrainer::Train(
- "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000");
- SentencePieceTrainer::Train(
- "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
- "--model_type=bpe");
- SentencePieceTrainer::Train(
- "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
- "--model_type=char");
- SentencePieceTrainer::Train(
- "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
- "--model_type=word");
+ std::string input = FLAGS_data_dir + "/botchan.txt";
+ SentencePieceTrainer::Train(std::string("--input=") + input +
+ " --model_prefix=m --vocab_size=1000");
+ SentencePieceTrainer::Train(std::string("--input=") + input +
+ " --model_prefix=m --vocab_size=1000 "
+ "--model_type=bpe");
+ SentencePieceTrainer::Train(std::string("--input=") + input +
+ " --model_prefix=m --vocab_size=1000 "
+ "--model_type=char");
+ SentencePieceTrainer::Train(std::string("--input=") + input +
+ " --model_prefix=m --vocab_size=1000 "
+ "--model_type=word");
}
TEST(SentencePieceTrainerTest, TrainWithCustomNormalizationRule) {
- SentencePieceTrainer::Train(
- "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
- "--normalization_rule_tsv=../data/nfkc.tsv");
+ SentencePieceTrainer::Train("--input=" + FLAGS_data_dir +
+ "/botchan.txt --model_prefix=m --vocab_size=1000 "
+ "--normalization_rule_tsv=" +
+ FLAGS_data_dir + "/nfkc.tsv");
}
TEST(SentencePieceTrainerTest, TrainErrorTest) {
@@ -50,7 +55,7 @@ TEST(SentencePieceTrainerTest, TrainErrorTest) {
TEST(SentencePieceTrainerTest, TrainTest) {
TrainerSpec trainer_spec;
- trainer_spec.add_input("../data/botchan.txt");
+ trainer_spec.add_input(FLAGS_data_dir + "/botchan.txt");
trainer_spec.set_model_prefix("m");
trainer_spec.set_vocab_size(1000);
NormalizerSpec normalizer_spec;
diff --git a/src/test_main.cc b/src/test_main.cc
index 000d013..9ec2b3f 100644
--- a/src/test_main.cc
+++ b/src/test_main.cc
@@ -12,9 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.!
+#include "flags.h"
#include "testharness.h"
+DEFINE_string(data_dir, "../data", "Data directory");
+
int main(int argc, char **argv) {
+ std::vector<std::string> rest_args;
+ sentencepiece::flags::ParseCommandLineFlags(argc, argv, &rest_args);
+
sentencepiece::test::RunAllTests();
return 0;
}
diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc
index aa60427..c7164fa 100644
--- a/src/unigram_model_trainer_test.cc
+++ b/src/unigram_model_trainer_test.cc
@@ -13,12 +13,16 @@
// limitations under the License.!
#include "unigram_model_trainer.h"
+
+#include "flags.h"
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
#include "util.h"
+DECLARE_string(data_dir);
+
namespace sentencepiece {
namespace unigram {
namespace {
@@ -38,7 +42,8 @@ TEST(UnigramTrainerTest, EndToEndTest) {
EXPECT_OK(SentencePieceTrainer::Train(
std::string("--model_prefix=") + sf.filename() +
- " --input=../data/wagahaiwa_nekodearu.txt"
+ " --input=" + FLAGS_data_dir +
+ "/wagahaiwa_nekodearu.txt"
" --vocab_size=8000"
" --normalization_rule_name=identity"
" --model_type=unigram"