Welcome to mirror list, hosted at ThFree Co, Russian Federation.

merge-sorted.cc « misc - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: ae693215bed424188036b9b6b76e07363c8e554a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
// This program takes gzipped sorted files and merges them in sorted order
// to stdout. Written by Ulrich Germann
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/shared_ptr.hpp>
#include <algorithm>
#include <string>
#include <vector>
#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"
using namespace std;
using namespace ugdiss;
using namespace boost::iostreams;

typedef boost::shared_ptr<filtering_istream> fptr;

class Part
{
  string       fname;
  fptr             f;
  string my_lines[2];
  size_t         ctr;
public:
  string const& line() const 
  { 
    static string empty_line;
    return f ? my_lines[ctr%2] : empty_line; 
  }

  Part(string _fname) : ctr(0)
  {
    fname = _fname;
    f.reset(open_input_stream(fname));
    if (!getline(*f, my_lines[0])) f.reset();
  }

  bool next() 
  {
    if (!f) return false;
    if (!getline(*f, my_lines[++ctr%2]))
      {
        f.reset();
        --ctr;
        return false;
      }
     assert(my_lines[(ctr-1)%2] <= my_lines[ctr%2]);
    return true;
  }

  bool operator <(Part const& other) const 
  { return line() < other.line(); }

  bool operator <=(Part const& other) const 
  { return line() <= other.line(); }

  bool operator >(Part const& other) const 
  { return line() > other.line(); }

  bool operator >=(Part const& other) const 
  { return line() >= other.line(); }

  bool go(ostream& out)
  {
    if (!f) return false;
#if 0
    if (ctr)
      {
        out << fname << "-" << ctr - 1 << "-";
        out << my_lines[(ctr - 1)%2] << endl;
      }
    do 
      {
        out << fname << " " << ctr << " ";
        out << line() << "\n";
      }
    while (next() && my_lines[0] == my_lines[1]);
#else
    do    { out << line() << "\n"; } 
    while (next() && my_lines[0] == my_lines[1]);
    out.flush();
#endif
    return f != NULL;
  }
  
};


int main(int argc, char* argv[])
{
  vector<Part> parts;
  for (int i = 1; i < argc; ++i)
    parts.push_back(Part(argv[i]));
  make_heap(parts.begin(), parts.end(), greater<Part>());
  while (parts.size())
    {
      pop_heap(parts.begin(), parts.end(), greater<Part>());
      if (parts.back().go(cout))
        push_heap(parts.begin(), parts.end(), greater<Part>());
      else parts.pop_back();
    }
}