Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/training/eppex/LossyCounter.h')
-rw-r--r--scripts/training/eppex/LossyCounter.h388
1 files changed, 0 insertions, 388 deletions
diff --git a/scripts/training/eppex/LossyCounter.h b/scripts/training/eppex/LossyCounter.h
deleted file mode 100644
index 57cce079d..000000000
--- a/scripts/training/eppex/LossyCounter.h
+++ /dev/null
@@ -1,388 +0,0 @@
-/**
- * LossyCounter - implementation of Lossy Counting algorithm as described in:
- * Approximate Frequency Counts over Data Streams, G.S.Manku & R.Motwani, (2002)
- *
- * (C) Ceslav Przywara, UFAL MFF UK, 2011
- *
- * Implementation note: define USE_UNORDERED_MAP to use std::tr1::unordered_map
- * instead std::map for storage of lossy counted items.
- *
- * $Id$
- */
-
-#ifndef LOSSYCOUNTER_H
-#define LOSSYCOUNTER_H
-
-#include <stddef.h>
-#include <math.h>
-#ifdef USE_UNORDERED_MAP
-#include <tr1/unordered_map>
-#else
-#include <map>
-#endif
-#include <iterator>
-
-
-// Iterators:
-template<class T>
-class LossyCounterIterator;
-
-template<class T>
-class LossyCounterErasingIterator;
-
-
-////////////////////////////////////////////////////////////////////////////////
-/////////////////////////// Lossy Counter Interface ////////////////////////////
-////////////////////////////////////////////////////////////////////////////////
-
-/**
- * Implementation of Lossy Counting algorithm as described in:
- * Approximate Frequency Counts over Data Streams, G.S.Manku & R.Motwani, (2002)
- */
-template<class T>
-class LossyCounter {
-
-public:
-
- // Error parameter type definition.
- typedef double error_t;
-
- // Support parameter type definition.
- typedef double support_t;
-
- // Counters type definition.
- typedef size_t counter_t;
-
- // Frequency counter type definition (f).
- typedef counter_t frequency_t;
-
- // Maximum error counter type definition (Δ).
- typedef counter_t maximum_error_t;
-
- // Pair: frequency (f) and possible maximum error (Δ).
- typedef std::pair<frequency_t, maximum_error_t> item_counts_t;
-#ifdef USE_UNORDERED_MAP
- typedef std::tr1::unordered_map<T, item_counts_t, typename T::Hash> storage_t;
-#else
- // Mapping: counted item and its frequency and max. error counts.
- typedef std::map<T, item_counts_t> storage_t;
-#endif
- // We provide own version of const iterator.
- typedef LossyCounterIterator<T> const_iterator;
-
- // Special type of iterator: leaves no item behind!
- typedef LossyCounterErasingIterator<T> erasing_iterator;
-
- /** @var Error parameter value (ε) */
- const error_t error;
-
- /** @var Supprort parameter value (s) */
- const support_t support;
-
- /** @var Width of single bucket (w) */
- const counter_t bucketWidth; // ceil(1/error)
-
-private:
-
- /** @var Current epoch bucket ID (b-current) */
- counter_t _bucketId;
-
- /** @var Count of items read in so far (N) */
- counter_t _count;
-
- /** @var Items storage (Ɗ) */
- storage_t _storage;
-
-public:
-
- /**
- * Set error to 0 to disable lossy-pruning.
- * @param _error Value from interval [0.0, 1.0).
- * @param _support Value from interval [0.0, 1.0).
- */
- LossyCounter<T>(error_t _error, support_t _support):
- error(_error), support(_support), bucketWidth(_error > 0.0 ? ceil(1/_error) : 0), _bucketId(1), _count(0), _storage() {}
-
- /**
- * @param item Item to be added to storage.
- */
- void add(const T& item);
-
- /**
- * @return Constant iterator pointing to the beginning of storage.
- */
- const_iterator begin(void) const { return LossyCounterIterator<T>(threshold(), _storage.begin(), _storage.end()); }
-
- /**
- * @return Constant iterator pointing to the end of storage.
- */
- const_iterator end(void) const { return LossyCounterIterator<T>(_storage.end()); }
-
- /**
- * @return Erasing iterator pointing to the beginning of storage.
- */
- erasing_iterator beginErase(void) { return LossyCounterErasingIterator<T>(threshold(), _storage); }
-
- /**
- * @return Erasing iterator pointing to the end of storage.
- */
- erasing_iterator endErase(void) { return LossyCounterErasingIterator<T>(_storage); }
-
- /**
- * @return Current bucket ID.
- */
- counter_t bucketId(void) const { return _bucketId; }
-
- /**
- * @return Number of items added to storage so far (N).
- */
- counter_t count(void) const { return _count; }
-
- /**
- * @return Number of items currently in storage.
- */
- size_t size(void) const { return _storage.size(); }
-
- /**
- * @param positive Return sN value instead of (s-ε)N?
- * @return Threshold (either positive or negative) value.
- */
- double threshold(bool positive = false) const { return positive ? support * _count : (support - error) * _count; }
-
- /**
- * @return The maximum value of which estimated frequencies are less than the true frequencies.
- */
- double maxError(void) const { return error * _count; }
-
- /**
- * @return True, if it's prunning time right now!
- */
- bool aboutToPrune(void) const { return (bucketWidth != 0) && ((_count % bucketWidth) == 0); }
-
-private:
-
- /**
- * Prunes counts table.
- */
- void prune(void);
-
-};
-
-
-////////////////////////////////////////////////////////////////////////////////
-/////////////////////// Lossy Counter Iterator Interface ///////////////////////
-////////////////////////////////////////////////////////////////////////////////
-
-/**
- * Lossy counter iterator is designed to iterate over items passing the current
- * threshold.
- */
-template<class T>
-class LossyCounterIterator: public std::iterator<std::forward_iterator_tag, typename LossyCounter<T>::storage_t::value_type> {
-public:
-
- typedef LossyCounterIterator<T> self_type;
-
- typedef typename LossyCounter<T>::storage_t::const_iterator const_iterator;
-
-protected:
-
- /** @var Minimum frequency threshold */
- const double _threshold;
-
- /** @var Current position of iterator (based on underlying container) */
- const_iterator _current;
-
- /** @var End position in underlying container */
- const const_iterator _end;
-
-public:
-
- // Constructors.
-
- LossyCounterIterator<T>(const_iterator end):
- _threshold(0), _current(_end), _end(end) {}
-
- LossyCounterIterator<T>(double threshold, const_iterator begin, const_iterator end):
- _threshold(threshold), _current(begin), _end(end) {
- // Forward the iterator to the first valid item (with frequency above threshold)!
- this->forward(true);
- }
-
- // Operators.
-
- /**
- * Only items passing the threshold are included in iteration.
- */
- LossyCounterIterator<T> operator++(void); // ++this
-
- /**
- * Only items passing the threshold are included in iteration.
- */
- LossyCounterIterator<T> operator++(int junk); // this++
-
- bool operator==(const self_type& rhs) const { return _current == rhs._current; }
-
- bool operator!=(const self_type& rhs) const { return _current != rhs._current; }
-
- // Interface.
-
- /**
- * @return Current item.
- */
- const T& item(void) const { return _current->first; }
-
- /**
- * @return Current item's frequency.
- */
- typename LossyCounter<T>::frequency_t frequency(void) const { return _current->second.first; }
-
- /**
- * @return Current item's maximum error.
- */
- typename LossyCounter<T>::error_t max_error(void) const { return _current->second.second; }
-
-protected:
-
- /**
- * @param init Check also the item that iterator _current points to? Useful when initializing.
- */
- void forward(bool init);
-
-};
-
-/**
- * Lossy counter iterator erasing all items passed by.
- */
-template<class T>
-class LossyCounterErasingIterator: public LossyCounterIterator<T> {
-public:
-
- typedef typename LossyCounter<T>::storage_t storage_t;
-
-private:
-
- storage_t& _storage;
-
-public:
-
- // Constructors - have to be aware of the "mother" container.
-
- LossyCounterErasingIterator<T>(storage_t& storage): LossyCounterIterator<T>(storage.end()), _storage(storage) {}
-
- LossyCounterErasingIterator<T>(double threshold, storage_t& storage): LossyCounterIterator<T>(threshold, storage.begin(), storage.end()), _storage(storage) {}
-
-protected:
-
- /**
- * @param init Check also the item that iterator _current points to? Useful when initializing.
- */
- void forward(bool init);
-
-};
-
-
-////////////////////////////////////////////////////////////////////////////////
-//////////////////////// Lossy Counter Implementation //////////////////////////
-////////////////////////////////////////////////////////////////////////////////
-
-template<class T>
-void LossyCounter<T>::add(const T& item) {
-
- typename storage_t::iterator iter = _storage.find(item);
-
- if ( iter == _storage.end() ) {
- // Insert new item with appropriate frequency and maximum-possible-error.
- _storage.insert(std::make_pair(item, item_counts_t(1, _bucketId - 1)));
- }
- else {
- // Update frequency of existing.
- iter->second.first += 1;
- }
-
- // Finally increment the counter and check if the table shall be pruned.
- ++_count;
- if ( this->aboutToPrune() ) {
- this->prune();
- ++_bucketId;
- }
-}
-
-
-template<class T>
-void LossyCounter<T>::prune(void) {
-
- for ( typename storage_t::iterator iter = _storage.begin(); iter != _storage.end(); /* Incrementation step missing intentionally! */ ) {
- // Prune, if: maximum possible error + frequency <= ID of current bucket
- if ( (iter->second.first + iter->second.second) <= _bucketId ) {
- _storage.erase(iter++); // Post increment!
- }
- else {
- ++iter;
- }
- }
-
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-//////////////////// Lossy Counter Iterator Implementation /////////////////////
-////////////////////////////////////////////////////////////////////////////////
-
-template<class T>
-LossyCounterIterator<T> LossyCounterIterator<T>::operator++(void) {
- this->forward();
- return *this;
-}
-
-
-template<class T>
-LossyCounterIterator<T> LossyCounterIterator<T>::operator++(int junk) {
- self_type iter = *this;
- this->forward();
- return iter;
-}
-
-template<class T>
-void LossyCounterIterator<T>::forward(bool init = false) {
- if ( _current == _end ) {
- return; // Nowhere to go, we're at the end already.
- }
- if ( init && (this->frequency() >= _threshold) ) {
- // Ok, we're initing and we're already at the item passing the threshold.
- return;
- }
- // Keep going until we reach the end or the next item with frequency NOT below the threshold.
- while ( (++_current != _end) && (this->frequency() < _threshold) );
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-//////////////// Lossy Counter Erasing Iterator Implementation /////////////////
-////////////////////////////////////////////////////////////////////////////////
-
-template<class T>
-void LossyCounterErasingIterator<T>::forward(bool init = false) {
- if (this->_current == this->_end ) {
- return; // Nowhere to go, we're at the end already.
- }
- if ( init && (this->frequency() >= this->_threshold) ) {
- // Ok, we're initing and we're already at the item passing the threshold.
- return;
- }
- // This is where erasing forward() and std forward() differ:
- typename LossyCounterIterator<T>::const_iterator previous = this->_current;
- // Keep going...
- while ( ++this->_current != this->_end ) {
- if ( this->frequency() < this->_threshold ) {
- // Get rid of previous, cause we'll go on.
- _storage.erase(previous);
- previous = this->_current;
- } else {
- break;
- }
- }
- _storage.erase(previous); // !
-}
-
-#endif /* LOSSYCOUNTER_H */