1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
// Copyright 2016 The Draco Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_
#define DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_
#include <stdint.h>
#include <vector>
namespace draco {
// Computes an approximate Shannon entropy of symbols stored in the provided
// input array |symbols|. The entropy corresponds to the number of bits that is
// required to represent/store all the symbols using an optimal entropy coding
// algorithm. See for example "A mathematical theory of communication" by
// Shannon'48 (http://ieeexplore.ieee.org/document/6773024/).
//
// |max_value| is a required input that define the maximum value in the input
// |symbols| array.
//
// |out_num_unique_symbols| is an optional output argument that stores the
// number of unique symbols contained within the |symbols| array.
// TODO(ostava): This should be renamed or the return value should be changed to
// return the actual entropy and not the number of bits needed to represent the
// input symbols.
int64_t ComputeShannonEntropy(const uint32_t *symbols, int num_symbols,
int max_value, int *out_num_unique_symbols);
// Computes the Shannon entropy of |num_values| Boolean entries, where
// |num_true_values| are set to true.
// Returns entropy between 0-1.
double ComputeBinaryShannonEntropy(uint32_t num_values,
uint32_t num_true_values);
// Class that can be used to keep track of the Shannon entropy on streamed data.
// As new symbols are pushed to the tracker, the entropy is automatically
// recomputed. The class also support recomputing the entropy without actually
// pushing the symbols to the tracker through the Peek() method.
class ShannonEntropyTracker {
public:
ShannonEntropyTracker();
// Struct for holding entropy data about the symbols added to the tracker.
// It can be used to compute the number of bits needed to store the data using
// the method:
// ShannonEntropyTracker::GetNumberOfDataBits(entropy_data);
// or to compute the approximate size of the frequency table needed by the
// rans coding using method:
// ShannonEntropyTracker::GetNumberOfRAnsTableBits(entropy_data);
struct EntropyData {
double entropy_norm;
int num_values;
int max_symbol;
int num_unique_symbols;
EntropyData()
: entropy_norm(0.0),
num_values(0),
max_symbol(0),
num_unique_symbols(0) {}
};
// Adds new symbols to the tracker and recomputes the entropy accordingly.
EntropyData Push(const uint32_t *symbols, int num_symbols);
// Returns new entropy data for the tracker as if |symbols| were added to the
// tracker without actually changing the status of the tracker.
EntropyData Peek(const uint32_t *symbols, int num_symbols);
// Gets the number of bits needed for encoding symbols added to the tracker.
int64_t GetNumberOfDataBits() const {
return GetNumberOfDataBits(entropy_data_);
}
// Gets the number of bits needed for encoding frequency table using the rans
// encoder.
int64_t GetNumberOfRAnsTableBits() const {
return GetNumberOfRAnsTableBits(entropy_data_);
}
// Gets the number of bits needed for encoding given |entropy_data|.
static int64_t GetNumberOfDataBits(const EntropyData &entropy_data);
// Gets the number of bits needed for encoding frequency table using the rans
// encoder for the given |entropy_data|.
static int64_t GetNumberOfRAnsTableBits(const EntropyData &entropy_data);
private:
EntropyData UpdateSymbols(const uint32_t *symbols, int num_symbols,
bool push_changes);
std::vector<int32_t> frequencies_;
EntropyData entropy_data_;
};
} // namespace draco
#endif // DRACO_COMPRESSION_ENTROPY_SHANNON_ENTROPY_H_
|