remove legacy files

author: Ales Tamchyna <tamchyna@ufal.mff.cuni.cz> 2015-01-06 19:52:19 +0300
committer: Ales Tamchyna <tamchyna@ufal.mff.cuni.cz> 2015-01-06 19:52:19 +0300
commit: 1970d46706fe80b917e385dfc0b8e7e91b5d62c6 (patch)
tree: b0e8cfbd22d6593c7cca7df1acf0b498f895ce4a /vw
parent: 887392b8c2b74ea3819685e6fdc639e312e13965 (diff)
5 files changed, 0 insertions, 486 deletions
diff --git a/vw/ExtractorConfig.cpp b/vw/ExtractorConfig.cpp
deleted file mode 100644
index 27bc570ba..000000000
--- a/vw/ExtractorConfig.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#include "ExtractorConfig.h"
-#include "Util.h"
-
-#include <exception>
-#include <stdexcept>
-#include <algorithm>
-#include <set>
-
-using namespace std;
-using namespace boost::bimaps;
-using namespace Moses;
-
-namespace Discriminative
-{
-
-void ExtractorConfig::Load(const string &configFile)
-{
-  try {
-    IniReader reader(configFile);
-    m_sourceInternal  = reader.Get<bool>("features.source-internal", false);
-    m_sourceExternal  = reader.Get<bool>("features.source-external", false);
-    m_targetInternal  = reader.Get<bool>("features.target-internal", false);
-    m_sourceIndicator = reader.Get<bool>("features.source-indicator", false);
-    m_targetIndicator = reader.Get<bool>("features.target-indicator", false);
-    m_sourceTargetIndicator = reader.Get<bool>("features.source-target-indicator", false);
-    m_STSE = reader.Get<bool>("features.source-target-source-external", false);
-    m_paired          = reader.Get<bool>("features.paired", false);
-    m_bagOfWords      = reader.Get<bool>("features.bag-of-words", false);
-    m_mostFrequent    = reader.Get<bool>("features.most-frequent", false);
-    m_binnedScores    = reader.Get<bool>("features.binned-scores", false);
-    m_sourceTopic     = reader.Get<bool>("features.source-topic", false);
-    m_phraseFactor    = reader.Get<bool>("features.phrase-factor", false);
-    m_windowSize      = reader.Get<size_t>("features.window-size", 0);  
-
-    m_factors = Scan<size_t>(Tokenize(reader.Get<string>("features.factors", ""), ","));
-    m_scoreIndexes = Scan<size_t>(Tokenize(reader.Get<string>("features.scores", ""), ","));
-    m_scoreBins = Scan<float>(Tokenize(reader.Get<string>("features.score-bins", ""), ","));
-
-    m_vwOptsTrain = reader.Get<string>("vw-options.train", "");
-    m_vwOptsPredict = reader.Get<string>("vw-options.predict", "");
-
-    m_normalization = reader.Get<string>("decoder.normalization", "");
-
-    m_isLoaded = true;
-  } catch (const runtime_error &err) {
-    cerr << "Error loading file " << configFile << ": " << err.what();
-    m_isLoaded = false;
-  }
-}
-
-} // namespace Discriminative
diff --git a/vw/ExtractorConfig.h b/vw/ExtractorConfig.h
deleted file mode 100644
index 23e136da7..000000000
--- a/vw/ExtractorConfig.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef moses_ExtractorConfig_h
-#define moses_ExtractorConfig_h
-
-#include <vector>
-#include <string>
-#include <map>
-#include <boost/bimap/bimap.hpp>
-#include "IniReader.h"
-
-namespace Discriminative
-{
-
-class ExtractorConfig
-{
-  public:
-    void Load(const std::string &configFile);
-    inline bool GetSourceExternal() const { return m_sourceExternal; }
-    inline bool GetSourceInternal() const { return m_sourceInternal; }
-    inline bool GetTargetInternal() const { return m_targetInternal; }
-    inline bool GetSourceIndicator() const { return m_sourceIndicator; }
-    inline bool GetTargetIndicator() const { return m_targetIndicator; }
-    inline bool GetSourceTargetIndicator() const { return m_sourceTargetIndicator; }
-    inline bool GetSTSE() const { return m_STSE; }
-    inline bool GetPhraseFactor() const   { return m_phraseFactor; }
-    inline bool GetPaired() const         { return m_paired; }
-    inline bool GetBagOfWords() const     { return m_bagOfWords; }
-    inline bool GetMostFrequent() const   { return m_mostFrequent; }
-    inline size_t GetWindowSize() const   { return m_windowSize; }
-    inline bool GetBinnedScores() const   { return m_binnedScores; }
-    inline bool GetSourceTopic() const    { return m_sourceTopic; }
-    inline const std::vector<size_t> &GetFactors() const { return m_factors; }
-    inline const std::vector<size_t> &GetScoreIndexes() const { return m_scoreIndexes; }
-    inline const std::vector<float> &GetScoreBins() const { return m_scoreBins; }
-    inline const std::string &GetVWOptionsTrain() const { return m_vwOptsTrain; }
-    inline const std::string &GetVWOptionsPredict() const { return m_vwOptsPredict; }
-    inline const std::string &GetNormalization() const { return m_normalization; }
-
-    inline bool IsLoaded() const { return m_isLoaded; }
-
-  private:
-    // read from configuration
-    bool m_paired, m_bagOfWords, m_sourceExternal,
-         m_sourceInternal, m_targetInternal, m_mostFrequent,
-         m_binnedScores, m_sourceIndicator, m_targetIndicator, 
-         m_sourceTargetIndicator, m_STSE, m_sourceTopic, m_phraseFactor;
-    std::string m_vwOptsPredict, m_vwOptsTrain, m_normalization;
-    size_t m_windowSize;
-    std::vector<size_t> m_factors, m_scoreIndexes;
-    std::vector<float> m_scoreBins;
-
-    // internal variables
-    bool m_isLoaded;
-};
-
-} // namespace Discriminative
-
-#endif // moses_ExtractorConfig_h
diff --git a/vw/FeatureExtractor.cpp b/vw/FeatureExtractor.cpp
deleted file mode 100644
index 63a45ccc4..000000000
--- a/vw/FeatureExtractor.cpp
+++ /dev/null
@@ -1,229 +0,0 @@
-#include "FeatureExtractor.h"
-#include "Util.h"
-
-using namespace std;
-using namespace Moses;
-
-namespace Discriminative
-{
-
-FeatureExtractor::FeatureExtractor(const ExtractorConfig &config, bool train)
-  : m_config(config), m_train(train)
-{  
-  if (! m_config.IsLoaded())
-    throw logic_error("configuration file not loaded");
-}
-
-void FeatureExtractor::GenerateFeatures(Classifier *fc,
-  const ContextType &context,
-  size_t spanStart,
-  size_t spanEnd,
-  const vector<Translation> &translations,
-  vector<float> &losses)
-{  
-  fc->SetNamespace('s', true);
-
-  if (m_config.GetSourceExternal()) GenerateContextFeatures(context, spanStart, spanEnd, fc);
-
-  // get words (surface forms) in source phrase
-  vector<string> sourceForms(spanEnd - spanStart + 1);
-  for (size_t i = spanStart; i <= spanEnd; i++)
-    sourceForms[i - spanStart] = context[i][FACTOR_FORM]; 
-  
-  map<string, float> maxProbs;
-  if (m_config.GetMostFrequent()) maxProbs = GetMaxProb(translations);
-
-  if (m_config.GetSourceInternal()) GenerateInternalFeatures(sourceForms, fc);
-  if (m_config.GetPhraseFactor()) GeneratePhraseFactorFeatures(context, spanStart, spanEnd, fc);
-  if (m_config.GetBagOfWords()) GenerateBagOfWordsFeatures(context, spanStart, spanEnd, FACTOR_FORM, fc);
-
-	if (m_config.GetSourceIndicator()) GenerateIndicatorFeature(sourceForms, fc); 
-
-  vector<Translation>::const_iterator transIt = translations.begin();
-  vector<float>::iterator lossIt = losses.begin();
-  for (; transIt != translations.end(); transIt++, lossIt++) {
-    assert(lossIt != losses.end());
-    fc->SetNamespace('t', false);
-
-    // get words in target phrase
-    const vector<string> &targetForms = transIt->translation;
-
-    if (m_config.GetTargetInternal()) GenerateInternalFeatures(targetForms, fc);
-    if (m_config.GetPaired()) GeneratePairedFeatures(sourceForms, targetForms, transIt->m_alignment, fc);
-
-    if (m_config.GetMostFrequent()) GenerateMostFrequentFeature(transIt->m_ttableScores, maxProbs, fc);
-
-    if (m_config.GetBinnedScores()) GenerateScoreFeatures(transIt->m_ttableScores, fc);
-
-    // "NOT_IN_" features
-    if (m_config.GetBinnedScores() || m_config.GetMostFrequent()) GenerateTTableEntryFeatures(transIt->m_ttableScores, fc);
-
-		if (m_config.GetTargetIndicator()) GenerateIndicatorFeature(targetForms, fc); 
-
-		if (m_config.GetSourceTargetIndicator()) GenerateConcatIndicatorFeature(sourceForms, targetForms, fc); 
-
-		if (m_config.GetSTSE()) GenerateSTSE(sourceForms, targetForms, context, spanStart, spanEnd, fc); 
-
-    if (m_train) {
-      fc->Train(SPrint(DUMMY_IDX), *lossIt);
-    } else {
-      *lossIt = fc->Predict(SPrint(DUMMY_IDX));
-    }
-  }
-  fc->FinishExample();
-}
-
-//
-// private methods
-//
-
-string FeatureExtractor::BuildContextFeature(size_t factor, int index, const string &value)
-{
-  return "c^" + SPrint(factor) + "_" + SPrint(index) + "_" + value;
-}
-
-void FeatureExtractor::GenerateContextFeatures(const ContextType &context,
-  size_t spanStart,
-  size_t spanEnd,
-  Classifier *fc)
-{
-  vector<size_t>::const_iterator factIt;
-  for (factIt = m_config.GetFactors().begin(); factIt != m_config.GetFactors().end(); factIt++) {
-    for (size_t i = 1; i <= m_config.GetWindowSize(); i++) {
-      string left = "<s>";
-      string right = "</s>"; 
-      if (spanStart >= i)
-        left = context[spanStart - i][*factIt];
-      fc->AddFeature(BuildContextFeature(*factIt, -i, left));
-      if (spanEnd + i < context.size()) 
-        right = context[spanEnd + i][*factIt];
-      fc->AddFeature(BuildContextFeature(*factIt, i, right));
-    }
-  }
-}
-
-void FeatureExtractor::GenerateIndicatorFeature(const vector<string> &span, Classifier *fc)
-{
-  fc->AddFeature("p^" + Join("_", span));
-}
-
-void FeatureExtractor::GenerateConcatIndicatorFeature(const vector<string> &span1, const vector<string> &span2, Classifier *fc)
-{
-  fc->AddFeature("p^" + Join("_", span1) + "^" + Join("_", span2));
-}
-
-void FeatureExtractor::GenerateSTSE(const vector<string> &span1, const vector<string> &span2, 
-  const ContextType &context,
-  size_t spanStart,
-  size_t spanEnd,
-  Classifier *fc)
-{
-  vector<size_t>::const_iterator factIt;
-  for (factIt = m_config.GetFactors().begin(); factIt != m_config.GetFactors().end(); factIt++) {
-    for (size_t i = 1; i <= m_config.GetWindowSize(); i++) {
-      string left = "<s>";
-      string right = "</s>"; 
-      if (spanStart >= i)
-        left = context[spanStart - i][*factIt];
-      fc->AddFeature("stse^" + Join("_", span1) + "^" + Join("_", span2) + BuildContextFeature(*factIt, -i, left));
-      if (spanEnd + i < context.size()) 
-        right = context[spanEnd + i][*factIt];
-      fc->AddFeature("stse^" + Join("_", span1) + "^" + Join("_", span2) + BuildContextFeature(*factIt, i, right));
-    }
-  }
-}
-
-void FeatureExtractor::GenerateInternalFeatures(const vector<string> &span, Classifier *fc)
-{
-  vector<string>::const_iterator it;
-  for (it = span.begin(); it != span.end(); it++) {
-    fc->AddFeature("w^" + *it);
-  }
-}
-
-void FeatureExtractor::GenerateBagOfWordsFeatures(const ContextType &context, size_t spanStart, size_t spanEnd, size_t factorID, Classifier *fc)
-{
-  for (size_t i = 0; i < spanStart; i++)
-    fc->AddFeature("bow^" + context[i][factorID]);
-  for (size_t i = spanEnd + 1; i < context.size(); i++)
-    fc->AddFeature("bow^" + context[i][factorID]);
-}
-
-void FeatureExtractor::GeneratePhraseFactorFeatures(const ContextType &context, size_t spanStart, size_t spanEnd, Classifier *fc)
-{
-  for (size_t i = spanStart; i <= spanEnd; i++) {
-    vector<size_t>::const_iterator factIt;
-    for (factIt = m_config.GetFactors().begin(); factIt != m_config.GetFactors().end(); factIt++) {
-      fc->AddFeature("ibow^" + SPrint(*factIt) + "_" + context[i][*factIt]);
-    }
-  }
-}
-
-void FeatureExtractor::GeneratePairedFeatures(const vector<string> &srcPhrase, const vector<string> &tgtPhrase, 
-    const AlignmentType &align, Classifier *fc)
-{
-  AlignmentType::const_iterator it;
-  set<size_t> srcAligned;
-  set<size_t> tgtAligned;
-
-  for (it = align.begin(); it != align.end(); it++) {
-    fc->AddFeature("pair^" + srcPhrase[it->first] + "^" + tgtPhrase[it->second]);
-    srcAligned.insert(it->first);
-    tgtAligned.insert(it->second);
-  }
-
-  for (size_t i = 0; i < srcPhrase.size(); i++) {
-    if (srcAligned.count(i) == 0)
-      fc->AddFeature("pair^" + srcPhrase[i] + "^NULL");
-  }
-
-  for (size_t i = 0; i < tgtPhrase.size(); i++) {
-    if (tgtAligned.count(i) == 0)
-      fc->AddFeature("pair^NULL^" + tgtPhrase[i]);
-  }
-}
-
-void FeatureExtractor::GenerateScoreFeatures(const std::vector<TTableEntry> &ttableScores, Classifier *fc)
-{
-  vector<size_t>::const_iterator scoreIt;
-  vector<float>::const_iterator binIt;
-  vector<TTableEntry>::const_iterator tableIt;
-  const vector<size_t> &scoreIDs = m_config.GetScoreIndexes();
-  const vector<float> &bins = m_config.GetScoreBins();
-
-  for (tableIt = ttableScores.begin(); tableIt != ttableScores.end(); tableIt++) {
-    if (! tableIt->m_exists)
-      continue;
-    string prefix = ttableScores.size() == 1 ? "" : tableIt->m_id + "_";
-    for (scoreIt = scoreIDs.begin(); scoreIt != scoreIDs.end(); scoreIt++) {
-      for (binIt = bins.begin(); binIt != bins.end(); binIt++) {
-        float logScore = log(tableIt->m_scores[*scoreIt]);
-        if (logScore < *binIt || Equals(logScore, *binIt)) {
-          fc->AddFeature(prefix + "sc^" + SPrint<size_t>(*scoreIt) + "_" + SPrint(*binIt));
-        }
-      }
-    }
-  }
-}
-
-void FeatureExtractor::GenerateMostFrequentFeature(const std::vector<TTableEntry> &ttableScores, const map<string, float> &maxProbs, Classifier *fc)
-{
-  vector<TTableEntry>::const_iterator it;
-  for (it = ttableScores.begin(); it != ttableScores.end(); it++) {
-    if (it->m_exists && Equals(it->m_scores[P_E_F_INDEX], maxProbs.find(it->m_id)->second)) {
-      string prefix = ttableScores.size() == 1 ? "" : it->m_id + "_";
-      fc->AddFeature(prefix + "MOST_FREQUENT");
-    }
-  }
-}
-
-void FeatureExtractor::GenerateTTableEntryFeatures(const std::vector<TTableEntry> &ttableScores, Classifier *fc)
-{
-  vector<TTableEntry>::const_iterator it;
-  for (it = ttableScores.begin(); it != ttableScores.end(); it++) {
-    if (! it->m_exists)
-      fc->AddFeature("NOT_IN_" + it->m_id);
-  }
-}
-
-} // namespace Discriminative
diff --git a/vw/FeatureExtractor.h b/vw/FeatureExtractor.h
deleted file mode 100644
index 2edbd331e..000000000
--- a/vw/FeatureExtractor.h
+++ /dev/null
@@ -1,88 +0,0 @@
-#ifndef moses_FeatureExtractor_h
-#define moses_FeatureExtractor_h
-
-#include "Classifier.h"
-#include "ExtractorConfig.h"
-
-#include <vector>
-#include <string>
-#include <exception>
-#include <stdexcept>
-#include <map>
-
-namespace Discriminative
-{
-
-// label index passed to the classifier, this value is not used in our setting
-const int DUMMY_IDX = 1111;
-
-// vector of words, each word is a vector of factors
-typedef std::vector<std::vector<std::string> > ContextType; 
-
-typedef std::multimap<size_t, size_t> AlignmentType;
-
-// In DA scenario, there are multiple phrase tables. This struct
-// contains scores for a phrase in one phrase-table.
-struct TTableEntry
-{
-  std::string m_id;            // phrase-table identifier
-  bool m_exists;               // does translation exist in this table 
-  std::vector<float> m_scores; // translation scores (empty if m_exists == false)
-};
-
-// One translation (phrase target side). 
-struct Translation
-{
-  std::vector<std::string> translation;    // words (surface forms) of translation
-  AlignmentType m_alignment;               // phrase-internal word alignment
-  std::vector<TTableEntry> m_ttableScores; // phrase scores in each phrase table
-};
-
-// extract features
-class FeatureExtractor
-{
-public:
-  FeatureExtractor(const ExtractorConfig &config, bool train);
-
-  // Generate features for current source phrase and all its translation options, based on 
-  // configuration. Calls all auxiliary Generate* methods.
-  //
-  // In training, reads the &losses parameter and passes them to VW. In prediction, &losses is 
-  // an output variable where VW scores are written.
-  void GenerateFeatures(Classifier *fc,
-    const ContextType &context,
-    size_t spanStart,
-    size_t spanEnd,
-    const std::vector<Translation> &translations,
-    std::vector<float> &losses);
-
-private:
-  const ExtractorConfig &m_config;      // Configuration of features.
-  bool m_train;                         // Train or predict.
-
-  // Get the highest probability P(e|f) associated with any of the translation options,
-  // separately for each phrase table (string keys are phrase-table IDs).
-  std::map<std::string, float> GetMaxProb(const std::vector<Translation> &translations);
-
-  void GenerateContextFeatures(const ContextType &context, size_t spanStart, size_t spanEnd, Classifier *fc);
-  void GeneratePhraseFactorFeatures(const ContextType &context, size_t spanStart, size_t spanEnd, Classifier *fc);
-  void GenerateInternalFeatures(const std::vector<std::string> &span, Classifier *fc);
-  void GenerateIndicatorFeature(const std::vector<std::string> &span, Classifier *fc);
-  void GenerateConcatIndicatorFeature(const std::vector<std::string> &span1, const std::vector<std::string> &span2, Classifier *fc);
-  void GenerateSTSE(const std::vector<std::string> &span1, const std::vector<std::string> &span2, const ContextType &context, size_t spanStart, size_t spanEnd, Classifier *fc);
-  void GenerateBagOfWordsFeatures(const ContextType &context, size_t spanStart, size_t spanEnd, size_t factorID, Classifier *fc);
-  void GeneratePairedFeatures(const std::vector<std::string> &srcPhrase,
-      const std::vector<std::string> &tgtPhrase,
-      const AlignmentType &align,
-      Classifier *fc);
-  void GenerateScoreFeatures(const std::vector<TTableEntry> &ttableScores, Classifier *fc);
-  void GenerateMostFrequentFeature(const std::vector<TTableEntry> &ttableScores,
-      const std::map<std::string, float> &maxProbs,
-      Classifier *fc);
-  void GenerateTTableEntryFeatures(const std::vector<TTableEntry> &ttableScores, Classifier *fc);
-  std::string BuildContextFeature(size_t factor, int index, const std::string &value);
-};
-
-} // namespace Discriminative
-
-#endif // moses_FeatureExtractor_h
diff --git a/vw/IniReader.h b/vw/IniReader.h
deleted file mode 100644
index 528491c8a..000000000
--- a/vw/IniReader.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef moses_iniReader_h
-#define moses_iniReader_h
-
-#include <vector>
-#include <algorithm>
-#include <string>
-#include <fstream>
-#include <map>
-#include <exception>
-#include <stdexcept>
-
-#include <boost/algorithm/string.hpp>
-#include <boost/bind.hpp>
-#include <boost/algorithm/string/classification.hpp>
-#include <boost/lexical_cast.hpp>
-
-// simple reader of .ini files
-class IniReader {
-public:
-  IniReader(const std::string &file)
-  {
-    std::ifstream inStr(file.c_str());
-    if (! inStr.is_open())
-      throw std::runtime_error("Failed to open file " + file);
-
-    std::string section = "";
-    std::string line;
-    while (getline(inStr, line)) {
-      if (line.empty() || line[0] == ';' || line[0] == '#') {
-        // empty line or comment, do nothing
-      } else if (line[0] == '[') {
-        // new section
-        section = line.substr(1, line.size() - 2);        
-      } else {
-        std::vector<std::string> cols;
-        boost::split(cols, line, boost::is_any_of("="));
-        std::for_each(cols.begin(), cols.end(),
-          boost::bind(&boost::trim<std::string>, _1, std::locale()));
-        if (section.empty())
-          throw std::runtime_error("Missing section");
-        if (cols.size() != 2)
-          throw std::runtime_error("Failed to parse line: '" + line + "'");
-        std::string key = section + "." + cols[0];
-        properties[key] = cols[1];
-      }
-    }
-    inStr.close();
-  }
-
-  template <class T>
-  T Get(const std::string &key, T defaultValue)
-  {
-    std::map<std::string, std::string>::const_iterator it = properties.find(key);
-    return (it == properties.end()) ? defaultValue : boost::lexical_cast<T>(it->second);
-  }
-
-private:
-  std::map<std::string, std::string> properties;
-};
-
-#endif // moses_iniReader_h
author	Ales Tamchyna <tamchyna@ufal.mff.cuni.cz>	2015-01-06 19:52:19 +0300
committer	Ales Tamchyna <tamchyna@ufal.mff.cuni.cz>	2015-01-06 19:52:19 +0300
commit	1970d46706fe80b917e385dfc0b8e7e91b5d62c6 (patch)
tree	b0e8cfbd22d6593c7cca7df1acf0b498f895ce4a /vw
parent	887392b8c2b74ea3819685e6fdc639e312e13965 (diff)