Welcome to mirror list, hosted at ThFree Co, Russian Federation.

shannon_entropy.h « entropy « compression « draco « src « draco « draco « extern - git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 85165f4cb8da9ec684725227dc8a6d60f0be7a5e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Copyright 2016 The Draco Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_
#define DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_

#include <stdint.h>

#include <vector>

namespace draco {

// Computes an approximate Shannon entropy of symbols stored in the provided
// input array |symbols|. The entropy corresponds to the number of bits that is
// required to represent/store all the symbols using an optimal entropy coding
// algorithm. See for example "A mathematical theory of communication" by
// Shannon'48 (http://ieeexplore.ieee.org/document/6773024/).
//
// |max_value| is a required input that define the maximum value in the input
// |symbols| array.
//
// |out_num_unique_symbols| is an optional output argument that stores the
// number of unique symbols contained within the |symbols| array.
// TODO(ostava): This should be renamed or the return value should be changed to
// return the actual entropy and not the number of bits needed to represent the
// input symbols.
int64_t ComputeShannonEntropy(const uint32_t *symbols, int num_symbols,
                              int max_value, int *out_num_unique_symbols);

// Computes the Shannon entropy of |num_values| Boolean entries, where
// |num_true_values| are set to true.
// Returns entropy between 0-1.
double ComputeBinaryShannonEntropy(uint32_t num_values,
                                   uint32_t num_true_values);

// Class that can be used to keep track of the Shannon entropy on streamed data.
// As new symbols are pushed to the tracker, the entropy is automatically
// recomputed. The class also support recomputing the entropy without actually
// pushing the symbols to the tracker through the Peek() method.
class ShannonEntropyTracker {
 public:
  ShannonEntropyTracker();

  // Struct for holding entropy data about the symbols added to the tracker.
  // It can be used to compute the number of bits needed to store the data using
  // the method:
  //   ShannonEntropyTracker::GetNumberOfDataBits(entropy_data);
  // or to compute the approximate size of the frequency table needed by the
  // rans coding using method:
  //   ShannonEntropyTracker::GetNumberOfRAnsTableBits(entropy_data);
  struct EntropyData {
    double entropy_norm;
    int num_values;
    int max_symbol;
    int num_unique_symbols;
    EntropyData()
        : entropy_norm(0.0),
          num_values(0),
          max_symbol(0),
          num_unique_symbols(0) {}
  };

  // Adds new symbols to the tracker and recomputes the entropy accordingly.
  EntropyData Push(const uint32_t *symbols, int num_symbols);

  // Returns new entropy data for the tracker as if |symbols| were added to the
  // tracker without actually changing the status of the tracker.
  EntropyData Peek(const uint32_t *symbols, int num_symbols);

  // Gets the number of bits needed for encoding symbols added to the tracker.
  int64_t GetNumberOfDataBits() const {
    return GetNumberOfDataBits(entropy_data_);
  }

  // Gets the number of bits needed for encoding frequency table using the rans
  // encoder.
  int64_t GetNumberOfRAnsTableBits() const {
    return GetNumberOfRAnsTableBits(entropy_data_);
  }

  // Gets the number of bits needed for encoding given |entropy_data|.
  static int64_t GetNumberOfDataBits(const EntropyData &entropy_data);

  // Gets the number of bits needed for encoding frequency table using the rans
  // encoder for the given |entropy_data|.
  static int64_t GetNumberOfRAnsTableBits(const EntropyData &entropy_data);

 private:
  EntropyData UpdateSymbols(const uint32_t *symbols, int num_symbols,
                            bool push_changes);

  std::vector<int32_t> frequencies_;

  EntropyData entropy_data_;
};

}  // namespace draco

#endif  // DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_