// -*- c++ -*- // (c) 2007-2012 Ulrich Germann #ifndef __ug_mm_2d_table_h #define __ug_mm_2d_table_h #include #include #include #include #include "tpt_typedefs.h" #include "tpt_pickler.h" #include "ug_typedefs.h" #include "util/exception.hh" namespace bio=boost::iostreams; namespace ugdiss { using namespace std; template class mm2dTable { public: struct Cell { ID id; VAL val; bool operator<(ID const otherId) const { return id < otherId; } bool operator<(Cell const& other) const { return id < other.id; } struct SortDescendingByValue { bool operator()(Cell const& a, Cell const& b) const { return a.val > b.val; } }; }; struct Row { Cell const* start; Cell const* stop; VAL operator[](ID key) const; }; Cell const* data; VAL const* M1; VAL const* M2; OFFSET const* index; ID numRows; ID numCols; boost::shared_ptr file; VAL m1(ID key) const { return (key < numRows) ? M1[key] : INIT(0); } VAL m2(ID key) const { return (key < numCols) ? M2[key] : INIT(0); } void open(string fname); void close(); Row operator[](ID key) const; mm2dTable(string const fname="") { if (!fname.empty()) open(fname); }; ~mm2dTable() { file.reset(); }; }; template typename mm2dTable::Row mm2dTable:: operator[](ID key) const { Row ret; if (key < numRows) { ret.start = data+index[key]; ret.stop = data+index[key+1]; } else ret.start = ret.stop = data+index[key+1]; return ret; } template VAL mm2dTable:: Row:: operator[](ID key) const { if (start==stop) return INIT(0); Cell const* c = lower_bound(start,stop,key); return (c != stop && c->id == key ? c->val : INIT(0)); } template void mm2dTable:: open(string fname) { // cout << "opening " << fname << " at " << __FILE__ << ":" << __LINE__ << endl; if (access(fname.c_str(),R_OK)) { ostringstream msg; msg << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: " << "file '" << fname << " is not accessible." << endl; string foo = msg.str(); UTIL_THROW(util::Exception,foo.c_str()); } file.reset(new bio::mapped_file_source()); file->open(fname); if (!file->is_open()) { ostringstream msg; msg << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: " << "Opening file '" << fname << "' failed." << endl; string foo = msg.str(); UTIL_THROW(util::Exception,foo.c_str()); } char const* p = file->data(); filepos_type offset = *reinterpret_cast(p); index = reinterpret_cast(p+offset); p += sizeof(offset); numRows = *reinterpret_cast(p); p += sizeof(id_type); numCols = *reinterpret_cast(p); p += sizeof(id_type); data = reinterpret_cast(p); // cout << numRows << " rows; " << numCols << " columns " << endl; M1 = reinterpret_cast(index+numRows+1); M2 = M1+numRows; // cout << "Table " << fname << " has " << numRows << " rows and " // << numCols << " columns." << endl; // cout << "File size is " << file.size()*1024 << " bytes; "; // cout << "M2 starts " << (reinterpret_cast(M2) - file.data()) // << " bytes into the file" << endl; // cout << M2[0] << endl; } template< typename OFFSET, // integer type of file offsets typename ID, // integer type of column ids typename VAL, // type of cell values typename INIT, // INIT(0) initializes default values typename ICONT // inner container type > void write_mm_2d_table(ostream& out, vector const& T, vector const* m1 = NULL, vector const* m2 = NULL) { assert(T.size()); typedef typename ICONT::const_iterator iter; // compute marginals if necessary vector m1x,m2x; if (!m1) { m1x.resize(T.size(),INIT(0)); for (size_t r = 0; r < T.size(); ++r) for (iter c = T.at(r).begin(); c != T.at(r).end(); ++c) m1x[r] = m1x[r] + c->second; m1 = &m1x; } if (!m2) { for (size_t r = 0; r < T.size(); ++r) for (iter c = T.at(r).begin(); c != T.at(r).end(); ++c) { while (c->first >= m2x.size()) m2x.push_back(INIT(0)); m2x[c->first] = m2x[c->first] + c->second; } m2 = &m2x; } filepos_type idxOffset=0; numwrite(out,idxOffset); // place holder, we'll return here at the end numwrite(out,id_type(m1->size())); // number of rows numwrite(out,id_type(m2->size())); // number of columns // write actual table vector index; size_t ctr =0; index.reserve(m1->size()+1); for (ID r = 0; r < ID(T.size()); ++r) { //index.push_back(out.tellp()); index.push_back(ctr); ID lastId = 0; if (T.at(r).size()) lastId = T.at(r).begin()->first; for (typename ICONT::const_iterator c = T.at(r).begin(); c != T.at(r).end(); ++c) { ctr++; assert(c->first >= lastId); lastId = c->first; typename mm2dTable::Cell item; item.id = c->first; item.val = c->second; out.write(reinterpret_cast(&item),sizeof(item)); } } // index.push_back(out.tellp()); index.push_back(ctr); idxOffset=out.tellp(); // write index for (size_t i = 0; i < index.size(); ++i) { OFFSET o = index[i]; // (index[i]-index[0])/sizeof(VAL); out.write(reinterpret_cast(&o),sizeof(OFFSET)); } // write marginals out.write(reinterpret_cast(&(*m1)[0]),m1->size()*sizeof(VAL)); out.write(reinterpret_cast(&(*m2)[0]),m2->size()*sizeof(VAL)); out.seekp(0); numwrite(out,idxOffset); } } #endif