Welcome to mirror list, hosted at ThFree Co, Russian Federation.

ug_bitext_pstats.h « mm « UG « TranslationModel « moses - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: e4481ee52f0593399328d08d13b0883a05adbbb8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
#pragma once

#include <boost/thread.hpp>
#include <boost/unordered_map.hpp>

#include "ug_typedefs.h"
#include "ug_bitext_jstats.h"
#include "moses/thread_safe_container.h"

namespace sapt
{
  struct
  pstats
  {
    typedef boost::unordered_map<uint64_t, SPTR<pstats> > map_t;
    typedef Moses::ThreadSafeContainer<uint64_t, SPTR<pstats>, map_t> cache_t;
    typedef std::vector<unsigned char> alnvec;
    typedef boost::unordered_map<uint64_t, jstats> trg_map_t;
    typedef boost::unordered_map<uint32_t,uint32_t> indoc_map_t; 
#if UG_BITEXT_TRACK_ACTIVE_THREADS
    static ThreadSafeCounter active;
#endif
    mutable boost::mutex lock;               // for parallel gathering of stats
    mutable boost::condition_variable ready; // consumers can wait for me to be ready

    size_t raw_cnt;     // (approximate) raw occurrence count
    size_t sample_cnt;  // number of instances selected during sampling
    size_t good;        // number of selected instances with valid word alignments
    size_t sum_pairs;   // total number of target phrases extracted (can be > raw_cnt)
    size_t in_progress; // how many threads are currently working on this?

    uint32_t ofwd[LRModel::NONE+1]; // distribution of fwd phrase orientations
    uint32_t obwd[LRModel::NONE+1]; // distribution of bwd phrase orientations

    indoc_map_t indoc;
    trg_map_t trg;
    pstats();
    ~pstats();
    void release();
    void register_worker();
    size_t count_workers() { return in_progress; }

    size_t
    add(uint64_t const  pid, // target phrase id
        float const       w, // sample weight (1./(# of phrases extractable))
        float const       b, // sample bias score
        alnvec const&     a, // local alignment
        uint32_t const cnt2, // raw target phrase count
        uint32_t fwd_o,      // fwd. phrase orientation
        uint32_t bwd_o,      // bwd. phrase orientation
        int const docid,     // document where sample was found
        int const sid);      // index of sentence where sample was found
    
    void
    count_sample(int const docid,        // document where sample was found
		 size_t const num_pairs, // # of phrases extractable here
		 int const po_fwd,       // fwd phrase orientation
		 int const po_bwd);      // bwd phrase orientation
    void wait() const;
  };

}