// Copyright 2008 Abby Levenberg, David Talbot // // This file is part of RandLM // // RandLM is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // RandLM is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with RandLM. If not, see . #ifndef INC_RANDLM_CACHE_H #define INC_RANDLM_CACHE_H #include #include #include #include namespace randlm { template class CacheNode { public: typedef std::map* > childMap; // initialise value to 'unknown' (i.e. not yet queried or cached). CacheNode(T unknown_value) : value_(unknown_value) {} childMap childs_; // child pointers T value_; // value stored const void* state_; // state pointer }; template class Cache { public: typedef typename std::map* >::iterator childPtr; // unknown_value is used to indicate the ngram was not queried (yet) // null_value_ indicates it was queried but not found in model // space usage is handled by client. Cache(T unknown_value, T null_value) : cur_nodes_(0), unknown_value_(unknown_value), null_value_(null_value) { root_ = newNode(); } ~Cache() { if(clear()) { delete root_; root_ = NULL; } else { std::cerr << "Error freeing cache memory.\n"; } } bool setCacheNgram(const wordID_t* ngram, int len, T value, const void* state) { // inserts full ngram into cache CacheNode* node = root_; for (int i = len - 1; i > -1; --i) { childPtr child = node->childs_.find(ngram[i]); if( child != node->childs_.end() ) { // current node is already prefix. Go to child node node = node->childs_[ngram[i]]; } else { // no child for prefix. set new child link in current node CacheNode * newChild = newNode(node); node->childs_[ngram[i]] = newChild; // go to new node node = newChild; } } node->value_ = value; node->state_ = state; return true; } bool checkCacheNgram(const wordID_t* ngram, int len, T* value, const void** state) { // finds value for this full ngram only (returns false if full ngram not in cache) CacheNode * node = root_; for(int i = len - 1; i > -1; --i) { // go to deepest level node of ngram in cache childPtr child = node->childs_.find(ngram[i]); if( child != node->childs_.end() ) { // switch to child node node = node->childs_[ngram[i]]; } else { // not cached return false; } } *value = node->value_; if(state) *state = node->state_; return *value != null_value_ && *value != unknown_value_; } int getCache2(const wordID_t* ngram, int len, T** values, int* found) { // set values array to point to cache value nodes CacheNode * node = root_; *found = 0; //values[0] = &node->value_; // pointer to root node's value bool all_found = true; for(int i = len - 1; i > -1; --i) { // go to deepest level node of ngram in cache childPtr child = node->childs_.find(ngram[i]); if( child != node->childs_.end() ) { // switch to child node node = node->childs_[ngram[i]]; // get pointer to value (index by length - 1) values[i] = &node->value_; // if null_value then assume all extensions impossible if (node->value_ == null_value_) { return len - 1 - i; // max length posible } all_found = all_found && (node->value_ != unknown_value_); if (all_found) ++(*found); } else { // initialise uncached values CacheNode * newChild = newNode(node); node->childs_[ngram[i]] = newChild; // go to new node node = newChild; values[i] = &node->value_; } } return len; // all possible } int getCache(const wordID_t* ngram, int len, T** values, int* found) { // get pointers to values for ngram and constituents. // returns upper bound on longest subngram in model. // 'found' stores longest non-null and known value found. CacheNode * node = root_; *found = 0; values[0] = &node->value_; // pointer to root node's value bool all_found = true; for(int i = len - 1; i > -1; --i) { // go to deepest level node of ngram in cache childPtr child = node->childs_.find(ngram[i]); if( child != node->childs_.end() ) { // switch to child node node = node->childs_[ngram[i]]; // get pointer to value (index by length - 1) values[len - i] = &node->value_; // if null_value then assume all extensions impossible if (node->value_ == null_value_) return len - 1 - i; // max length posible all_found = all_found && (node->value_ != unknown_value_); if (all_found) ++(*found); } else { // initialise uncached values CacheNode * newChild = newNode(node); node->childs_[ngram[i]] = newChild; // go to new node node = newChild; values[len - i] = &node->value_; } } return len; // all possible } bool clear() { std::cerr << "Clearing cache with " << static_cast(cur_nodes_ * nodeSize()) / static_cast(1ull << 20) << "MB" << std::endl; return clearNodes(root_); } int nodes() { // returns number of nodes return cur_nodes_; } int nodeSize() { return sizeof(CacheNode) + sizeof(root_->childs_); } private: CacheNode * root_; count_t cur_nodes_; T unknown_value_; // Used to initialise data at each node T null_value_; // Indicates cached something not in model CacheNode* newNode(CacheNode * node = 0) { ++cur_nodes_; return new CacheNode(unknown_value_); } bool clearNodes(CacheNode * node) { //delete children from this node if(!node->childs_.empty()) { iterate(node->childs_, itr) { if(!clearNodes(itr->second)) std::cerr << "Error emptying cache\n"; delete itr->second; --cur_nodes_; } node->childs_.clear(); } return true; } }; } //end namespace #endif //INC_RANDLM_CACHE_H