Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2012-11-12 23:56:18 +0400
committerHieu Hoang <hieuhoang@gmail.com>2012-11-12 23:56:18 +0400
commit5e3ef23cef6101d2c098eb3445f562e8f595655b (patch)
treeb8c332b6fa82bae84ea4910967a10ba1b08a7107 /moses/FeatureVector.cpp
parent8c785cff2b1be3cccd76ea9026f71b649762dfc3 (diff)
move moses/src/* to moses/
Diffstat (limited to 'moses/FeatureVector.cpp')
-rw-r--r--moses/FeatureVector.cpp773
1 files changed, 773 insertions, 0 deletions
diff --git a/moses/FeatureVector.cpp b/moses/FeatureVector.cpp
new file mode 100644
index 000000000..c01775fd1
--- /dev/null
+++ b/moses/FeatureVector.cpp
@@ -0,0 +1,773 @@
+/*
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2010 University of Edinburgh
+
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <fstream>
+#include <sstream>
+#include <stdexcept>
+
+#include "FeatureVector.h"
+
+using namespace std;
+
+
+namespace Moses {
+
+ const string FName::SEP = "_";
+ FName::Name2Id FName::name2id;
+ vector<string> FName::id2name;
+ FName::Id2Count FName::id2hopeCount;
+ FName::Id2Count FName::id2fearCount;
+#ifdef WITH_THREADS
+ boost::shared_mutex FName::m_idLock;
+#endif
+
+ void FName::init(const string& name) {
+#ifdef WITH_THREADS
+ //reader lock
+ boost::shared_lock<boost::shared_mutex> lock(m_idLock);
+#endif
+ Name2Id::iterator i = name2id.find(name);
+ if (i != name2id.end()) {
+ m_id = i->second;
+ } else {
+#ifdef WITH_THREADS
+ //release the reader lock, and upgrade to writer lock
+ lock.unlock();
+ boost::unique_lock<boost::shared_mutex> write_lock(m_idLock);
+#endif
+ //Need to check again if the id is in the map, as someone may have added
+ //it while we were waiting on the writer lock.
+ if (i != name2id.end()) {
+ m_id = i->second;
+ } else {
+ m_id = name2id.size();
+ name2id[name] = m_id;
+ id2name.push_back(name);
+ }
+ }
+ }
+
+ size_t FName::getId(const string& name) {
+ Name2Id::iterator i = name2id.find(name);
+ assert (i != name2id.end());
+ return i->second;
+ }
+
+ size_t FName::getHopeIdCount(const string& name) {
+ Name2Id::iterator i = name2id.find(name);
+ if (i != name2id.end()) {
+ float id = i->second;
+ return id2hopeCount[id];
+ }
+ return 0;
+ }
+
+ size_t FName::getFearIdCount(const string& name) {
+ Name2Id::iterator i = name2id.find(name);
+ if (i != name2id.end()) {
+ float id = i->second;
+ return id2fearCount[id];
+ }
+ return 0;
+ }
+
+ void FName::incrementHopeId(const string& name) {
+ Name2Id::iterator i = name2id.find(name);
+ assert(i != name2id.end());
+#ifdef WITH_THREADS
+ // get upgradable lock and upgrade to writer lock
+ boost::upgrade_lock<boost::shared_mutex> upgradeLock(m_idLock);
+ boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(upgradeLock);
+#endif
+ id2hopeCount[i->second] += 1;
+ }
+
+ void FName::incrementFearId(const string& name) {
+ Name2Id::iterator i = name2id.find(name);
+ assert(i != name2id.end());
+#ifdef WITH_THREADS
+ // get upgradable lock and upgrade to writer lock
+ boost::upgrade_lock<boost::shared_mutex> upgradeLock(m_idLock);
+ boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(upgradeLock);
+#endif
+ id2fearCount[i->second] += 1;
+ }
+
+ void FName::eraseId(size_t id) {
+#ifdef WITH_THREADS
+ // get upgradable lock and upgrade to writer lock
+ boost::upgrade_lock<boost::shared_mutex> upgradeLock(m_idLock);
+ boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(upgradeLock);
+#endif
+ id2hopeCount.erase(id);
+ id2fearCount.erase(id);
+ }
+
+ std::ostream& operator<<( std::ostream& out, const FName& name) {
+ out << name.name();
+ return out;
+ }
+
+ size_t FName::hash() const {
+ return boost::hash_value(m_id);
+ }
+
+ const std::string& FName::name() const {
+ return id2name[m_id];
+ }
+
+
+ bool FName::operator==(const FName& rhs) const {
+ return m_id == rhs.m_id;
+ }
+
+ bool FName::operator!=(const FName& rhs) const {
+ return ! (*this == rhs);
+ }
+
+ FVector::FVector(size_t coreFeatures) : m_coreFeatures(coreFeatures) {}
+
+ void FVector::resize(size_t newsize) {
+ valarray<FValue> oldValues(m_coreFeatures);
+ m_coreFeatures.resize(newsize);
+ for (size_t i = 0; i < min(m_coreFeatures.size(), oldValues.size()); ++i) {
+ m_coreFeatures[i] = oldValues[i];
+ }
+ }
+
+ void FVector::clear() {
+ m_coreFeatures.resize(0);
+ m_features.clear();
+ }
+
+ bool FVector::load(const std::string& filename) {
+ clear();
+ ifstream in (filename.c_str());
+ if (!in) {
+ return false;
+ }
+ string line;
+ while(getline(in,line)) {
+ if (line[0] == '#') continue;
+ istringstream linestream(line);
+ string namestring;
+ FValue value;
+ linestream >> namestring;
+ linestream >> value;
+ FName fname(namestring);
+ //cerr << "Setting sparse weight " << fname << " to value " << value << "." << endl;
+ set(fname,value);
+ }
+ return true;
+ }
+
+ void FVector::save(const string& filename) const {
+ ofstream out(filename.c_str());
+ if (!out) {
+ ostringstream msg;
+ msg << "Unable to open " << filename;
+ throw runtime_error(msg.str());
+ }
+ write(out);
+ out.close();
+ }
+
+ void FVector::write(ostream& out) const {
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ out << i->first << " " << i->second << endl;
+ }
+ }
+
+ static bool equalsTolerance(FValue lhs, FValue rhs) {
+ if (lhs == rhs) return true;
+ static const FValue TOLERANCE = 1e-4;
+ FValue diff = abs(lhs-rhs);
+ FValue mean = (abs(lhs)+abs(rhs))/2;
+ //cerr << "ET " << lhs << " " << rhs << " " << diff << " " << mean << " " << endl;
+ return diff/mean < TOLERANCE ;
+ }
+
+ bool FVector::operator== (const FVector& rhs) const {
+ if (this == &rhs) {
+ return true;
+ }
+ if (m_coreFeatures.size() != rhs.m_coreFeatures.size()) {
+ return false;
+ }
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ if (!equalsTolerance(m_coreFeatures[i], rhs.m_coreFeatures[i])) return false;
+ }
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ if (!equalsTolerance(i->second,rhs.get(i->first))) return false;
+ }
+ for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i) {
+ if (!equalsTolerance(i->second, get(i->first))) return false;
+ }
+ return true;
+ }
+
+ bool FVector::operator!= (const FVector& rhs) const {
+ return ! (*this == rhs);
+ }
+
+ ProxyFVector FVector::operator[](const FName& name) {
+ // At this point, we don't know whether operator[] was called, so we return
+ // a proxy object and defer the decision until later
+ return ProxyFVector(this, name);
+ }
+
+ /** Equivalent for core features. */
+ FValue& FVector::operator[](size_t index) {
+ return m_coreFeatures[index];
+ }
+
+
+ FValue FVector::operator[](const FName& name) const {
+ return get(name);
+ }
+
+ FValue FVector::operator[](size_t index) const {
+ return m_coreFeatures[index];
+ }
+
+ ostream& FVector::print(ostream& out) const {
+ out << "core=(";
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ out << m_coreFeatures[i];
+ if (i + 1 < m_coreFeatures.size()) {
+ out << ",";
+ }
+ }
+ out << ") ";
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ if (i != cbegin())
+ out << " ";
+ out << i->first << "=" << i->second;
+ }
+ return out;
+ }
+
+ ostream& operator<<(ostream& out, const FVector& fv) {
+ return fv.print(out);
+ }
+
+ const FValue& FVector::get(const FName& name) const {
+ static const FValue DEFAULT = 0;
+ const_iterator fi = m_features.find(name);
+ if (fi == m_features.end()) {
+ return DEFAULT;
+ } else {
+ return fi->second;
+ }
+ }
+
+ const FValue& FVector::getBackoff(const FName& name, float backoff) const {
+ const_iterator fi = m_features.find(name);
+ if (fi == m_features.end()) {
+ return backoff;
+ } else {
+ return fi->second;
+ }
+ }
+
+ void FVector::thresholdScale(FValue maxValue ) {
+ FValue factor = 1.0;
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ FValue value = i->second;
+ if (abs(value)*factor > maxValue) {
+ factor = abs(value) / maxValue;
+ }
+ }
+ operator*=(factor);
+ }
+
+ void FVector::capMax(FValue maxValue) {
+ for (const_iterator i = cbegin(); i != cend(); ++i)
+ if (i->second > maxValue)
+ set(i->first, maxValue);
+ }
+
+ void FVector::capMin(FValue minValue) {
+ for (const_iterator i = cbegin(); i != cend(); ++i)
+ if (i->second < minValue)
+ set(i->first, minValue);
+ }
+
+ void FVector::set(const FName& name, const FValue& value) {
+ m_features[name] = value;
+ }
+
+ void FVector::printCoreFeatures() {
+ cerr << "core=(";
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ cerr << m_coreFeatures[i];
+ if (i + 1 < m_coreFeatures.size()) {
+ cerr << ",";
+ }
+ }
+ cerr << ") ";
+ }
+
+ FVector& FVector::operator+= (const FVector& rhs) {
+ if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
+ resize(rhs.m_coreFeatures.size());
+ for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+ set(i->first, get(i->first) + i->second);
+ for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
+ m_coreFeatures[i] += rhs.m_coreFeatures[i];
+ return *this;
+ }
+
+ // add only sparse features
+ void FVector::sparsePlusEquals(const FVector& rhs) {
+ for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+ set(i->first, get(i->first) + i->second);
+ }
+
+ // assign only core features
+ void FVector::coreAssign(const FVector& rhs) {
+ for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
+ m_coreFeatures[i] = rhs.m_coreFeatures[i];
+ }
+
+ void FVector::incrementSparseHopeFeatures() {
+ for (const_iterator i = cbegin(); i != cend(); ++i)
+ FName::incrementHopeId((i->first).name());
+ }
+
+ void FVector::incrementSparseFearFeatures() {
+ for (const_iterator i = cbegin(); i != cend(); ++i)
+ FName::incrementFearId((i->first).name());
+ }
+
+ void FVector::printSparseHopeFeatureCounts(std::ofstream& out) {
+ for (const_iterator i = cbegin(); i != cend(); ++i)
+ out << (i->first).name() << ": " << FName::getHopeIdCount((i->first).name()) << std::endl;
+ }
+
+ void FVector::printSparseFearFeatureCounts(std::ofstream& out) {
+ for (const_iterator i = cbegin(); i != cend(); ++i)
+ out << (i->first).name() << ": " << FName::getFearIdCount((i->first).name()) << std::endl;
+ }
+
+ void FVector::printSparseHopeFeatureCounts() {
+ for (const_iterator i = cbegin(); i != cend(); ++i)
+ std::cerr << (i->first).name() << ": " << FName::getHopeIdCount((i->first).name()) << std::endl;
+ }
+
+ void FVector::printSparseFearFeatureCounts() {
+ for (const_iterator i = cbegin(); i != cend(); ++i)
+ std::cerr << (i->first).name() << ": " << FName::getFearIdCount((i->first).name()) << std::endl;
+ }
+
+ size_t FVector::pruneSparseFeatures(size_t threshold) {
+ size_t count = 0;
+ vector<FName> toErase;
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ const std::string& fname = (i->first).name();
+ if (FName::getHopeIdCount(fname) < threshold && FName::getFearIdCount(fname) < threshold) {
+ toErase.push_back(i->first);
+ std::cerr << "pruning: " << fname << " (" << FName::getHopeIdCount(fname) << ", " << FName::getFearIdCount(fname) << ")" << std::endl;
+ FName::eraseId(FName::getId(fname));
+ ++count;
+ }
+ }
+
+ for (size_t i = 0; i < toErase.size(); ++i)
+ m_features.erase(toErase[i]);
+
+ return count;
+ }
+
+ size_t FVector::pruneZeroWeightFeatures() {
+ size_t count = 0;
+ vector<FName> toErase;
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ const std::string& fname = (i->first).name();
+ if (i->second == 0) {
+ toErase.push_back(i->first);
+ //std::cerr << "prune: " << fname << std::endl;
+ FName::eraseId(FName::getId(fname));
+ ++count;
+ }
+ }
+
+ for (size_t i = 0; i < toErase.size(); ++i)
+ m_features.erase(toErase[i]);
+
+ return count;
+ }
+
+ void FVector::updateConfidenceCounts(const FVector& weightUpdate, bool signedCounts) {
+ for (size_t i = 0; i < weightUpdate.m_coreFeatures.size(); ++i) {
+ if (signedCounts) {
+ //int sign = weightUpdate.m_coreFeatures[i] >= 0 ? 1 : -1;
+ //m_coreFeatures[i] += (weightUpdate.m_coreFeatures[i] * weightUpdate.m_coreFeatures[i]) * sign;
+ m_coreFeatures[i] += weightUpdate.m_coreFeatures[i];
+ }
+ else
+ //m_coreFeatures[i] += (weightUpdate.m_coreFeatures[i] * weightUpdate.m_coreFeatures[i]);
+ m_coreFeatures[i] += abs(weightUpdate.m_coreFeatures[i]);
+ }
+
+ for (const_iterator i = weightUpdate.cbegin(); i != weightUpdate.cend(); ++i) {
+ if (weightUpdate[i->first] == 0)
+ continue;
+ float value = get(i->first);
+ if (signedCounts) {
+ //int sign = weightUpdate[i->first] >= 0 ? 1 : -1;
+ //value += (weightUpdate[i->first] * weightUpdate[i->first]) * sign;
+ value += weightUpdate[i->first];
+ }
+ else
+ //value += (weightUpdate[i->first] * weightUpdate[i->first]);
+ value += abs(weightUpdate[i->first]);
+ set(i->first, value);
+ }
+ }
+
+ void FVector::updateLearningRates(float decay_core, float decay_sparse, const FVector &confidenceCounts, float core_r0, float sparse_r0) {
+ for (size_t i = 0; i < confidenceCounts.m_coreFeatures.size(); ++i) {
+ m_coreFeatures[i] = 1.0/(1.0/core_r0 + decay_core * abs(confidenceCounts.m_coreFeatures[i]));
+ }
+
+ for (const_iterator i = confidenceCounts.cbegin(); i != confidenceCounts.cend(); ++i) {
+ float value = 1.0/(1.0/sparse_r0 + decay_sparse * abs(i->second));
+ set(i->first, value);
+ }
+ }
+
+ // count non-zero occurrences for all sparse features
+ void FVector::setToBinaryOf(const FVector& rhs) {
+ for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+ if (rhs.get(i->first) != 0)
+ set(i->first, 1);
+ for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
+ m_coreFeatures[i] = 1;
+ }
+
+ // divide only core features by scalar
+ FVector& FVector::coreDivideEquals(float scalar) {
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i)
+ m_coreFeatures[i] /= scalar;
+ return *this;
+ }
+
+ // lhs vector is a sum of vectors, rhs vector holds number of non-zero summands
+ FVector& FVector::divideEquals(const FVector& rhs) {
+ assert(m_coreFeatures.size() == rhs.m_coreFeatures.size());
+ for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+ set(i->first, get(i->first)/rhs.get(i->first)); // divide by number of summands
+ for (size_t i = 0; i < rhs.m_coreFeatures.size(); ++i)
+ m_coreFeatures[i] /= rhs.m_coreFeatures[i]; // divide by number of summands
+ return *this;
+ }
+
+ FVector& FVector::operator-= (const FVector& rhs) {
+ if (rhs.m_coreFeatures.size() > m_coreFeatures.size())
+ resize(rhs.m_coreFeatures.size());
+ for (const_iterator i = rhs.cbegin(); i != rhs.cend(); ++i)
+ set(i->first, get(i->first) -(i->second));
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ if (i < rhs.m_coreFeatures.size()) {
+ m_coreFeatures[i] -= rhs.m_coreFeatures[i];
+ }
+ }
+ return *this;
+ }
+
+ FVector& FVector::operator*= (const FVector& rhs) {
+ if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
+ resize(rhs.m_coreFeatures.size());
+ }
+ for (iterator i = begin(); i != end(); ++i) {
+ FValue lhsValue = i->second;
+ FValue rhsValue = rhs.get(i->first);
+ set(i->first,lhsValue*rhsValue);
+ }
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ if (i < rhs.m_coreFeatures.size()) {
+ m_coreFeatures[i] *= rhs.m_coreFeatures[i];
+ } else {
+ m_coreFeatures[i] = 0;
+ }
+ }
+ return *this;
+ }
+
+ FVector& FVector::operator/= (const FVector& rhs) {
+ if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
+ resize(rhs.m_coreFeatures.size());
+ }
+ for (iterator i = begin(); i != end(); ++i) {
+ FValue lhsValue = i->second;
+ FValue rhsValue = rhs.get(i->first);
+ set(i->first, lhsValue / rhsValue) ;
+ }
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ if (i < rhs.m_coreFeatures.size()) {
+ m_coreFeatures[i] /= rhs.m_coreFeatures[i];
+ } else {
+ if (m_coreFeatures[i] < 0) {
+ m_coreFeatures[i] = -numeric_limits<FValue>::infinity();
+ } else if (m_coreFeatures[i] > 0) {
+ m_coreFeatures[i] = numeric_limits<FValue>::infinity();
+ }
+ }
+ }
+ return *this;
+ }
+
+ FVector& FVector::operator*= (const FValue& rhs) {
+ //NB Could do this with boost::bind ?
+ for (iterator i = begin(); i != end(); ++i) {
+ i->second *= rhs;
+ }
+ m_coreFeatures *= rhs;
+ return *this;
+ }
+
+ FVector& FVector::operator/= (const FValue& rhs) {
+ for (iterator i = begin(); i != end(); ++i) {
+ i->second /= rhs;
+ }
+ m_coreFeatures /= rhs;
+ return *this;
+ }
+
+ FVector& FVector::multiplyEqualsBackoff(const FVector& rhs, float backoff) {
+ if (rhs.m_coreFeatures.size() > m_coreFeatures.size()) {
+ resize(rhs.m_coreFeatures.size());
+ }
+ for (iterator i = begin(); i != end(); ++i) {
+ FValue lhsValue = i->second;
+ FValue rhsValue = rhs.getBackoff(i->first, backoff);
+ set(i->first,lhsValue*rhsValue);
+ }
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ if (i < rhs.m_coreFeatures.size()) {
+ m_coreFeatures[i] *= rhs.m_coreFeatures[i];
+ } else {
+ m_coreFeatures[i] = 0;
+ }
+ }
+ return *this;
+ }
+
+ FVector& FVector::multiplyEquals(float core_r0, float sparse_r0) {
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ m_coreFeatures[i] *= core_r0;
+ }
+ for (iterator i = begin(); i != end(); ++i)
+ set(i->first,(i->second)*sparse_r0);
+ return *this;
+ }
+
+ FValue FVector::l1norm() const {
+ FValue norm = 0;
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ norm += abs(i->second);
+ }
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ norm += abs(m_coreFeatures[i]);
+ }
+ return norm;
+ }
+
+ FValue FVector::l1norm_coreFeatures() const {
+ FValue norm = 0;
+ // ignore Bleu score feature (last feature)
+ for (size_t i = 0; i < m_coreFeatures.size()-1; ++i)
+ norm += abs(m_coreFeatures[i]);
+ return norm;
+ }
+
+ FValue FVector::l2norm() const {
+ return sqrt(inner_product(*this));
+ }
+
+ FValue FVector::linfnorm() const {
+ FValue norm = 0;
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ float absValue = abs(i->second);
+ if (absValue > norm)
+ norm = absValue;
+ }
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ float absValue = abs(m_coreFeatures[i]);
+ if (absValue > norm)
+ norm = absValue;
+ }
+ return norm;
+ }
+
+ size_t FVector::l1regularize(float lambda) {
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ float value = m_coreFeatures[i];
+ if (value > 0) {
+ m_coreFeatures[i] = max(0.0f, value - lambda);
+ }
+ else {
+ m_coreFeatures[i] = min(0.0f, value + lambda);
+ }
+ }
+
+ size_t numberPruned = size();
+ vector<FName> toErase;
+ for (iterator i = begin(); i != end(); ++i) {
+ float value = i->second;
+ if (value != 0.0f) {
+ if (value > 0)
+ value = max(0.0f, value - lambda);
+ else
+ value = min(0.0f, value + lambda);
+
+ if (value != 0.0f)
+ i->second = value;
+ else {
+ toErase.push_back(i->first);
+ const std::string& fname = (i->first).name();
+ FName::eraseId(FName::getId(fname));
+ }
+ }
+ }
+
+ // erase features that have become zero
+ for (size_t i = 0; i < toErase.size(); ++i)
+ m_features.erase(toErase[i]);
+ numberPruned -= size();
+ return numberPruned;
+ }
+
+ void FVector::l2regularize(float lambda) {
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ m_coreFeatures[i] *= (1 - lambda);
+ }
+
+ for (iterator i = begin(); i != end(); ++i) {
+ i->second *= (1 - lambda);
+ }
+ }
+
+ size_t FVector::sparseL1regularize(float lambda) {
+ /*for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ float value = m_coreFeatures[i];
+ if (value > 0) {
+ m_coreFeatures[i] = max(0.0f, value - lambda);
+ }
+ else {
+ m_coreFeatures[i] = min(0.0f, value + lambda);
+ }
+ }*/
+
+ size_t numberPruned = size();
+ vector<FName> toErase;
+ for (iterator i = begin(); i != end(); ++i) {
+ float value = i->second;
+ if (value != 0.0f) {
+ if (value > 0)
+ value = max(0.0f, value - lambda);
+ else
+ value = min(0.0f, value + lambda);
+
+ if (value != 0.0f)
+ i->second = value;
+ else {
+ toErase.push_back(i->first);
+ const std::string& fname = (i->first).name();
+ FName::eraseId(FName::getId(fname));
+ }
+ }
+ }
+
+ // erase features that have become zero
+ for (size_t i = 0; i < toErase.size(); ++i)
+ m_features.erase(toErase[i]);
+ numberPruned -= size();
+ return numberPruned;
+ }
+
+ void FVector::sparseL2regularize(float lambda) {
+ /*for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ m_coreFeatures[i] *= (1 - lambda);
+ }*/
+
+ for (iterator i = begin(); i != end(); ++i) {
+ i->second *= (1 - lambda);
+ }
+ }
+
+ FValue FVector::sum() const {
+ FValue sum = 0;
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ sum += i->second;
+ }
+ sum += m_coreFeatures.sum();
+ return sum;
+ }
+
+ FValue FVector::inner_product(const FVector& rhs) const {
+ CHECK(m_coreFeatures.size() == rhs.m_coreFeatures.size());
+ FValue product = 0.0;
+ for (const_iterator i = cbegin(); i != cend(); ++i) {
+ product += ((i->second)*(rhs.get(i->first)));
+ }
+ for (size_t i = 0; i < m_coreFeatures.size(); ++i) {
+ product += m_coreFeatures[i]*rhs.m_coreFeatures[i];
+ }
+ return product;
+ }
+
+ const FVector operator+(const FVector& lhs, const FVector& rhs) {
+ return FVector(lhs) += rhs;
+ }
+
+ const FVector operator-(const FVector& lhs, const FVector& rhs) {
+ return FVector(lhs) -= rhs;
+ }
+
+ const FVector operator*(const FVector& lhs, const FVector& rhs) {
+ return FVector(lhs) *= rhs;
+ }
+
+ const FVector operator/(const FVector& lhs, const FVector& rhs) {
+ return FVector(lhs) /= rhs;
+ }
+
+
+ const FVector operator*(const FVector& lhs, const FValue& rhs) {
+ return FVector(lhs) *= rhs;
+ }
+
+ const FVector operator/(const FVector& lhs, const FValue& rhs) {
+ return FVector(lhs) /= rhs;
+ }
+
+ FValue inner_product(const FVector& lhs, const FVector& rhs) {
+ if (lhs.size() >= rhs.size()) {
+ return rhs.inner_product(lhs);
+ } else {
+ return lhs.inner_product(rhs);
+ }
+ }
+}