Switched to cmake

author: Taku Kudo <taku@google.com> 2018-07-24 09:22:49 +0300
committer: Taku Kudo <taku@google.com> 2018-07-24 09:22:49 +0300
commit: 89831f80c125335a56807a1c738f2f509d03d6d5 (patch)
tree: 6d7dbfcaf2f195ae2d24017f6eee3049e2d7749f /src
parent: d64cc9ada66c4a601536b94e88132937f8768e9c (diff)
10 files changed, 283 insertions, 139 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
new file mode 100644
index 0000000..59206d2
--- /dev/null
+++ b/src/CMakeLists.txt
@@ -0,0 +1,231 @@
+# Copyright 2018 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.!
+
+find_package(Protobuf REQUIRED)
+include_directories(${Protobuf_INCLUDE_DIRS})
+protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto)
+protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto)
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+include_directories(${PROTOBUF_INCLUDE_DIR})
+
+set(SPM_SRCS
+  ${SPM_PROTO_HDRS}
+  ${SPM_PROTO_SRCS}
+  ${SPM_MODEL_PROTO_HDRS}
+  ${SPM_MODEL_PROTO_SRCS}
+  bpe_model.h
+  common.h
+  normalizer.h
+  util.h
+  flags.h
+  sentencepiece_processor.h
+  word_model.h
+  model_factory.h
+  char_model.h
+  model_interface.h
+  testharness.h
+  unigram_model.h
+  bpe_model.cc
+  char_model.cc
+  error.cc
+  flags.cc
+  model_factory.cc
+  model_interface.cc
+  normalizer.cc
+  sentencepiece_processor.cc
+  unigram_model.cc
+  util.cc
+  word_model.cc
+  ../third_party/absl/strings/string_view.cc)
+
+set(SPM_TRAIN_SRCS
+  ${SPM_PROTO_HDRS}
+  ${SPM_MODEL_PROTO_HDRS}
+  builder.h
+  normalization_rule.h
+  unicode_script.h
+  unicode_script_map.h
+  trainer_factory.h
+  trainer_interface.h
+  unigram_model_trainer.h
+  word_model_trainer.h
+  char_model_trainer.h
+  bpe_model_trainer.h
+  sentencepiece_trainer.h
+  builder.cc
+  unicode_script.cc
+  trainer_factory.cc
+  trainer_interface.cc
+  unigram_model_trainer.cc
+  word_model_trainer.cc
+  char_model_trainer.cc
+  bpe_model_trainer.cc
+  sentencepiece_trainer.cc)
+
+set(SPM_TEST_SRCS
+  ${SPM_PROTO_HDRS}
+  ${SPM_MODEL_PROTO_HDRS}
+  testharness.h
+  bpe_model_test.cc
+  bpe_model_trainer_test.cc
+  builder_test.cc
+  char_model_test.cc
+  char_model_trainer_test.cc
+  flags_test.cc
+  model_factory_test.cc
+  model_interface_test.cc
+  normalizer_test.cc
+  sentencepiece_processor_test.cc
+  sentencepiece_trainer_test.cc
+  test_main.cc
+  testharness.cc
+  trainer_factory_test.cc
+  trainer_interface_test.cc
+  unicode_script_test.cc
+  unigram_model_test.cc
+  unigram_model_trainer_test.cc
+  util_test.cc
+  word_model_test.cc
+  word_model_trainer_test.cc)
+
+find_package(Threads REQUIRED)
+
+set(SPM_LIBS ${PROTOBUF_LIBRARY} Threads::Threads)
+
+if (SPM_ENABLE_NFKC_COMPILE)
+  find_package(ICU 4.4 COMPONENTS i18n data uc REQUIRED)
+  include_directories(${ICU_INCLUDE_DIRS})
+  add_definitions(-DENABLE_NFKC_COMPILE)
+  list(APPEND SPM_LIBS ICU::i18n ICU::data ICU::uc)
+endif()
+
+if (SPM_ENABLE_TCMALLOC)
+  if (SPM_TCMALLOC_STATIC)
+    find_library(TCMALLOC_LIB NAMES libtcmalloc_minimal.a)
+  else()
+    find_library(TCMALLOC_LIB NAMES tcmalloc_minimal)
+  endif()
+  if (TCMALLOC_LIB)
+    list(APPEND SPM_LIBS ${TCMALLOC_LIB})
+  endif()
+endif()
+
+if (SPM_ENABLE_SHARED)
+  add_library(sentencepiece SHARED ${SPM_SRCS})
+  add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS})
+endif()
+
+add_library(sentencepiece-static STATIC ${SPM_SRCS})
+add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS})
+
+target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS})
+target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS})
+
+if (SPM_ENABLE_SHARED)
+  target_link_libraries(sentencepiece ${SPM_LIBS})
+  target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece)
+  set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static)
+  set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.1.0)
+  if (MSVC)
+    set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX "_import.lib")
+    set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX "_import.lib")
+  elseif (MINGW)
+    set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX ".dll.a")
+    set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX ".dll.a")
+  endif()
+else()
+  add_library(sentencepiece ALIAS sentencepiece-static)
+  add_library(sentencepiece_train ALIAS sentencepiece_train-static)
+  set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static)
+endif()
+
+set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece")
+set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train")
+
+if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+  if (SPM_COVERAGE)
+    set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}")
+  else()
+    set(CMAKE_CXX_FLAGS "-O3 -Wall -fPIC ${CMAKE_CXX_FLAGS}")
+  endif()
+  if (SPM_ENABLE_TENSORFLOW_SHARED)
+    add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+  endif()
+  set_source_files_properties(
+    sentencepiece.pb.cc sentencepiece_model.pb.cc
+    PROPERTIES COMPILE_FLAGS "-Wno-misleading-indentation")
+  set_source_files_properties(${SPM_TEST_SRCS}
+    PROPERTIES COMPILE_FLAGS "-Wno-sign-compare")
+  if (SPM_ENABLE_SHARED)
+    set_property(TARGET sentencepiece APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC")
+    set_property(TARGET sentencepiece_train APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC")
+  endif()
+endif()
+
+add_executable(spm_encode spm_encode_main.cc)
+add_executable(spm_decode spm_decode_main.cc)
+add_executable(spm_normalize spm_normalize_main.cc)
+add_executable(spm_train spm_train_main.cc)
+add_executable(spm_export_vocab spm_export_vocab_main.cc)
+
+target_link_libraries(spm_encode sentencepiece)
+target_link_libraries(spm_decode sentencepiece)
+target_link_libraries(spm_normalize sentencepiece sentencepiece_train)
+target_link_libraries(spm_train sentencepiece sentencepiece_train)
+target_link_libraries(spm_export_vocab sentencepiece)
+
+if (SPM_ENABLE_NFKC_COMPILE)
+  add_executable(compile_charsmap compile_charsmap_main.cc)
+  target_link_libraries(compile_charsmap sentencepiece)
+endif()
+
+list(APPEND SPM_INSTALLTARGETS
+  spm_encode spm_decode spm_normalize spm_train spm_export_vocab)
+
+install(TARGETS ${SPM_INSTALLTARGETS}
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+install(FILES sentencepiece_trainer.h sentencepiece_processor.h
+  DESTINATION ${CMAKE_INSTALL_INCDIR})
+
+if (SPM_BUILD_TEST OR SPM_COVERAGE)
+  enable_testing()
+  add_executable(spm_test test_main.cc ${SPM_TEST_SRCS})
+
+  if (SPM_COVERAGE)
+    target_link_libraries(spm_test sentencepiece sentencepiece_train "-lgcov")
+  else()
+    target_link_libraries(spm_test sentencepiece sentencepiece_train)
+  endif()
+
+  set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --show-leak-kinds=definite,possible --error-exitcode=1")
+  find_program(CTEST_MEMORYCHECK_COMMAND NAMES valgrind)
+  include(Dart)
+
+  add_test(NAME sentencepiece_test
+    COMMAND $<TARGET_FILE:spm_test> --data_dir=${PROJECT_SOURCE_DIR}/data)
+endif()
+
+if (SPM_COVERAGE)
+  add_custom_target(coverage
+    COMMAND mkdir -p coverage
+    COMMAND $<TARGET_FILE:spm_test> --data_dir=${PROJECT_SOURCE_DIR}/data
+    COMMAND lcov -c -d . -o coverage.info
+    COMMAND lcov --remove coverage.info "include*" "/c++" "_test*" "testharness*" "third_party*" ".pb.*" -o coverage.info
+    COMMAND mkdir -p lcov_html
+    COMMAND genhtml -o lcov_html coverage.info)
+  add_dependencies(coverage spm_test)
+endif()
diff --git a/src/Makefile.am b/src/Makefile.am
deleted file mode 100644
index d815a59..0000000
--- a/src/Makefile.am
+++ /dev/null
@@ -1,108 +0,0 @@
-lib_LTLIBRARIES = libsentencepiece.la libsentencepiece_train.la
-
-AM_CXXFLAS = -I($srcdir)
-AUTOMAKE_OPTIONS = subdir-objects
-
-libsentencepiece_la_SOURCES = \
-	error.cc \
-	flags.cc \
-	sentencepiece_processor.cc \
-	util.cc \
-	normalizer.cc \
-	unicode_script_map.h util.h \
-	common.h \
-	flags.h  normalizer.h sentencepiece_processor.h  \
-	model_factory.h model_factory.cc \
-	model_interface.h model_interface.cc \
-	unigram_model.h	unigram_model.cc  \
-	word_model.h word_model.cc \
-	char_model.h char_model.cc \
-	bpe_model.h bpe_model.cc \
-	../third_party/absl/strings/string_view.cc
-include_HEADERS = sentencepiece_processor.h sentencepiece_trainer.h
-
-# noinst_LIBRARIES = libsentencepiecetrain.a
-libsentencepiece_train_la_SOURCES = builder.cc builder.h \
-		    normalization_rule.h  \
-	   	    unicode_script.h unicode_script.cc \
-		    trainer_factory.h trainer_factory.cc \
-		    trainer_interface.h trainer_interface.cc \
-                    unigram_model_trainer.h unigram_model_trainer.cc \
-	     	    word_model_trainer.h word_model_trainer.cc \
-		    char_model_trainer.h char_model_trainer.cc \
-	            bpe_model_trainer.h bpe_model_trainer.cc \
-                    sentencepiece_trainer.h sentencepiece_trainer.cc
-
-nodist_libsentencepiece_la_SOURCES = \
-	sentencepiece.pb.cc  sentencepiece.pb.h \
-	sentencepiece_model.pb.cc sentencepiece_model.pb.h
-
-BUILT_SOURCES = \
-	sentencepiece.pb.cc \
-	sentencepiece_model.pb.cc
-
-EXTRA_DIST = sentencepiece.proto sentencepiece_model.proto
-
-bin_PROGRAMS = spm_encode spm_decode spm_normalize spm_train spm_export_vocab
-noinst_PROGRAMS = compile_charsmap
-
-spm_encode_SOURCES = spm_encode_main.cc
-spm_encode_LDADD = libsentencepiece.la
-
-spm_decode_SOURCES = spm_decode_main.cc
-spm_decode_LDADD = libsentencepiece.la
-
-spm_normalize_SOURCES = spm_normalize_main.cc
-spm_normalize_LDADD = libsentencepiece_train.la libsentencepiece.la
-
-spm_export_vocab_SOURCES = spm_export_vocab_main.cc
-spm_export_vocab_LDADD = libsentencepiece.la
-
-spm_train_SOURCES = spm_train_main.cc
-spm_train_LDADD = libsentencepiece_train.la libsentencepiece.la
-
-compile_charsmap_SOURCES = compile_charsmap_main.cc
-compile_charsmap_LDADD = libsentencepiece_train.la libsentencepiece.la
-
-
-check_PROGRAMS = spm_test
-TESTS = spm_test
-spm_test_SOURCES = testharness.h \
-                   builder_test.cc \
-	           flags_test.cc \
-	           normalizer_test.cc \
-	           sentencepiece_processor_test.cc \
-	           sentencepiece_trainer_test.cc \
-	           unicode_script_test.cc \
-	           model_interface_test.cc \
-	           model_factory_test.cc \
-	           trainer_interface_test.cc \
-	           trainer_factory_test.cc \
-	           word_model_test.cc \
-	           word_model_trainer_test.cc \
-		   bpe_model_test.cc \
-		   bpe_model_trainer_test.cc \
-	           char_model_test.cc \
-	           char_model_trainer_test.cc \
-	           unigram_model_test.cc\
-	           unigram_model_trainer_test.cc \
-                   util_test.cc \
-	           test_main.cc \
-                   testharness.cc
-
-spm_test_LDADD = libsentencepiece_train.la libsentencepiece.la
-
-CLEANFILES = *.pb.cc *.pb.h *.pb.h *.gcda *.gcno *.info
-clean-local:
-	-rm -rf lcov_html
-
-%.pb.cc %.pb.h: %.proto
-	$(PROTOC) --cpp_out=$(srcdir) $<
-
-coverage:
-	make clean
-	make -j10 CXXFLAGS+="-O0 -Wall -std=c++11 -coverage" LIBS+="-lgcov -lprotobuf" check
-	lcov -c -d . -o coverage.info
-	lcov --remove coverage.info "include*" "/c++" "_test*" "testharness*" "third_party*" ".pb.*" -o coverage.info
-	mkdir -p lcov_html
-	genhtml -o lcov_html coverage.info
diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc
index 71d49ba..01e3864 100644
--- a/src/bpe_model_trainer_test.cc
+++ b/src/bpe_model_trainer_test.cc
@@ -16,11 +16,14 @@
 
 #include <string>
 #include <vector>
+#include "flags.h"
 #include "sentencepiece_processor.h"
 #include "sentencepiece_trainer.h"
 #include "testharness.h"
 #include "util.h"
 
+DECLARE_string(data_dir);
+
 namespace sentencepiece {
 namespace bpe {
 namespace {
@@ -87,13 +90,14 @@ TEST(BPETrainerTest, BasicTest) {
 TEST(BPETrainerTest, EndToEndTest) {
   const test::ScopedTempFile sf("tmp_model");
 
-  EXPECT_OK(SentencePieceTrainer::Train(
-      std::string("--model_prefix=") + sf.filename() +
-      " --input=../data/wagahaiwa_nekodearu.txt"
-      " --vocab_size=8000"
-      " --normalization_rule_name=identity"
-      " --model_type=bpe"
-      " --control_symbols=<ctrl>"));
+  EXPECT_OK(SentencePieceTrainer::Train(std::string("--model_prefix=") +
+                                        sf.filename() +
+                                        " --input=" + FLAGS_data_dir +
+                                        "/wagahaiwa_nekodearu.txt"
+                                        " --vocab_size=8000"
+                                        " --normalization_rule_name=identity"
+                                        " --model_type=bpe"
+                                        " --control_symbols=<ctrl>"));
 
   SentencePieceProcessor sp;
   EXPECT_OK(sp.Load(std::string(sf.filename()) + ".model"));
diff --git a/src/builder.cc b/src/builder.cc
index be5e45d..e42503d 100644
--- a/src/builder.cc
+++ b/src/builder.cc
@@ -17,6 +17,8 @@
 #include <functional>
 #include <utility>
 
+#include "config.h"
+
 #ifdef ENABLE_NFKC_COMPILE
 #include <unicode/errorcode.h>
 #include <unicode/locid.h>
@@ -326,7 +328,8 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
     if (nfkc == nfkd) {
       continue;
     }
-    // Expand all possible sequences which are normalized into the same `nfkd`.
+    // Expand all possible sequences which are normalized into the same
+    // `nfkd`.
     for (const auto &nfkd_orig : ExpandUnnormalized(nfkd, norm2orig)) {
       if (nfkd_orig != nfkc) {
         nfkc_map[nfkd_orig] = nfkc;
diff --git a/src/builder_test.cc b/src/builder_test.cc
index 212d3d1..a3af444 100644
--- a/src/builder_test.cc
+++ b/src/builder_test.cc
@@ -14,11 +14,14 @@
 
 #include "builder.h"
 #include "common.h"
+#include "flags.h"
 #include "normalizer.h"
 #include "sentencepiece_trainer.h"
 #include "testharness.h"
 #include "util.h"
 
+DECLARE_string(data_dir);
+
 namespace sentencepiece {
 namespace normalizer {
 
@@ -135,7 +138,7 @@ TEST(BuilderTest, CompileCharsMap) {
 
 TEST(BuilderTest, LoadCharsMapTest) {
   Builder::CharsMap chars_map;
-  EXPECT_OK(Builder::LoadCharsMap("../data/nfkc.tsv", &chars_map));
+  EXPECT_OK(Builder::LoadCharsMap(FLAGS_data_dir + "/nfkc.tsv", &chars_map));
 
   std::string precompiled, expected;
   EXPECT_OK(Builder::CompileCharsMap(chars_map, &precompiled));
diff --git a/src/common.h b/src/common.h
index 4516be9..7e75bda 100644
--- a/src/common.h
+++ b/src/common.h
@@ -24,9 +24,7 @@
 #include <utility>
 #include <vector>
 
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
 
 #if defined(_WIN32) && !defined(__CYGWIN__)
 #define OS_WIN
diff --git a/src/flags.cc b/src/flags.cc
index c33e035..830c2cb 100644
--- a/src/flags.cc
+++ b/src/flags.cc
@@ -23,11 +23,8 @@
 #include <utility>
 
 #include "common.h"
-#include "util.h"
-
-#ifdef HAVE_CONFIG_H
 #include "config.h"
-#endif
+#include "util.h"
 
 namespace sentencepiece {
 namespace flags {
diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc
index 0c2107d..ead34c1 100644
--- a/src/sentencepiece_trainer_test.cc
+++ b/src/sentencepiece_trainer_test.cc
@@ -13,31 +13,36 @@
 // limitations under the License.!
 
 #include "sentencepiece_trainer.h"
+#include "flags.h"
 #include "sentencepiece_model.pb.h"
 #include "testharness.h"
 #include "util.h"
 
+DECLARE_string(data_dir);
+
 namespace sentencepiece {
 namespace {
 
 TEST(SentencePieceTrainerTest, TrainFromArgsTest) {
-  SentencePieceTrainer::Train(
-      "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000");
-  SentencePieceTrainer::Train(
-      "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
-      "--model_type=bpe");
-  SentencePieceTrainer::Train(
-      "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
-      "--model_type=char");
-  SentencePieceTrainer::Train(
-      "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
-      "--model_type=word");
+  std::string input = FLAGS_data_dir + "/botchan.txt";
+  SentencePieceTrainer::Train(std::string("--input=") + input +
+                              " --model_prefix=m --vocab_size=1000");
+  SentencePieceTrainer::Train(std::string("--input=") + input +
+                              " --model_prefix=m --vocab_size=1000 "
+                              "--model_type=bpe");
+  SentencePieceTrainer::Train(std::string("--input=") + input +
+                              " --model_prefix=m --vocab_size=1000 "
+                              "--model_type=char");
+  SentencePieceTrainer::Train(std::string("--input=") + input +
+                              " --model_prefix=m --vocab_size=1000 "
+                              "--model_type=word");
 }
 
 TEST(SentencePieceTrainerTest, TrainWithCustomNormalizationRule) {
-  SentencePieceTrainer::Train(
-      "--input=../data/botchan.txt --model_prefix=m --vocab_size=1000 "
-      "--normalization_rule_tsv=../data/nfkc.tsv");
+  SentencePieceTrainer::Train("--input=" + FLAGS_data_dir +
+                              "/botchan.txt --model_prefix=m --vocab_size=1000 "
+                              "--normalization_rule_tsv=" +
+                              FLAGS_data_dir + "/nfkc.tsv");
 }
 
 TEST(SentencePieceTrainerTest, TrainErrorTest) {
@@ -50,7 +55,7 @@ TEST(SentencePieceTrainerTest, TrainErrorTest) {
 
 TEST(SentencePieceTrainerTest, TrainTest) {
   TrainerSpec trainer_spec;
-  trainer_spec.add_input("../data/botchan.txt");
+  trainer_spec.add_input(FLAGS_data_dir + "/botchan.txt");
   trainer_spec.set_model_prefix("m");
   trainer_spec.set_vocab_size(1000);
   NormalizerSpec normalizer_spec;
diff --git a/src/test_main.cc b/src/test_main.cc
index 000d013..9ec2b3f 100644
--- a/src/test_main.cc
+++ b/src/test_main.cc
@@ -12,9 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.!
 
+#include "flags.h"
 #include "testharness.h"
 
+DEFINE_string(data_dir, "../data", "Data directory");
+
 int main(int argc, char **argv) {
+  std::vector<std::string> rest_args;
+  sentencepiece::flags::ParseCommandLineFlags(argc, argv, &rest_args);
+
   sentencepiece::test::RunAllTests();
   return 0;
 }
diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc
index aa60427..c7164fa 100644
--- a/src/unigram_model_trainer_test.cc
+++ b/src/unigram_model_trainer_test.cc
@@ -13,12 +13,16 @@
 // limitations under the License.!
 
 #include "unigram_model_trainer.h"
+
+#include "flags.h"
 #include "sentencepiece_model.pb.h"
 #include "sentencepiece_processor.h"
 #include "sentencepiece_trainer.h"
 #include "testharness.h"
 #include "util.h"
 
+DECLARE_string(data_dir);
+
 namespace sentencepiece {
 namespace unigram {
 namespace {
@@ -38,7 +42,8 @@ TEST(UnigramTrainerTest, EndToEndTest) {
 
   EXPECT_OK(SentencePieceTrainer::Train(
       std::string("--model_prefix=") + sf.filename() +
-      " --input=../data/wagahaiwa_nekodearu.txt"
+      " --input=" + FLAGS_data_dir +
+      "/wagahaiwa_nekodearu.txt"
       " --vocab_size=8000"
       " --normalization_rule_name=identity"
       " --model_type=unigram"
author	Taku Kudo <taku@google.com>	2018-07-24 09:22:49 +0300
committer	Taku Kudo <taku@google.com>	2018-07-24 09:22:49 +0300
commit	89831f80c125335a56807a1c738f2f509d03d6d5 (patch)
tree	6d7dbfcaf2f195ae2d24017f6eee3049e2d7749f /src
parent	d64cc9ada66c4a601536b94e88132937f8768e9c (diff)