ALib C++ Framework
by
Library Version: 2511 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
huffman.cpp
Go to the documentation of this file.
1//##################################################################################################
2// ALib C++ Framework
3//
4// Copyright 2013-2025 A-Worx GmbH, Germany
5// Published under 'Boost Software License' (a free software license, see LICENSE.txt)
6//##################################################################################################
7/// \file
8#include "alib_precompile.hpp"
9#if !defined(ALIB_C20_MODULES) || ((ALIB_C20_MODULES != 0) && (ALIB_C20_MODULES != 1))
10# error "Configuration MACRO ALIB_C20_MODULES has to be given to the compiler as either 0 or 1"
11#endif
12#if ALIB_C20_MODULES
13 module;
14#endif
15//========================================= Global Fragment ========================================
17
18//============================================== Module ============================================
19#if ALIB_C20_MODULES
20 module ALib.BitBuffer;
21 import ALib.Containers.FixedCapacityVector;
22#else
24# include "ALib.BitBuffer.H"
25#endif
26
27namespace alib { namespace bitbuffer { namespace ac_v1 {
28
29#if !DOXYGEN
30//#define TEMP_PT(...) __VA_ARGS__
31# define TEMP_PT(...)
32#endif
33
34namespace
35{
36/// Internal struct representing nodes of the huffman code tree.
37class Node
38{
39 /// Shortcut to the symbol-struct.
41
42 protected:
43 /// Either a pointer to a left subtree or to a symbol.
44 /// The latter makes the node a leaf.
46 {
47 Symbol* symbol; ///< The symbol represented by this node (if not \c nullptr).
48 Node* left; ///< The left subtree.
49
50 LeftPointer() = default; ///< Non-initializing default constructor.
51
52 /// Constructs this union with a symbol pointer.
53 /// @param s The symbol to set.
55 : symbol( s ) {}
56
57 /// Constructs this union with a cursor.
58 /// @param l The node to set.
60 : left ( l ) {}
61 };
62
63 LeftPointer left; ///< If \b right is set, then this is a pointer to the left subtree,
64 ///< otherwise a pointer to a symbol.
65 Node* right; ///< The right subtree.
66
67
68 public:
69 size_t frequency; ///< The frequency of the symbol or the sum of the two subtrees.
70
71 /// Default constructor used when defining arrays of nodes on stack memory.
72 Node() =default;
73
74 /// Constructs a node representing a symbol (leaf).
75 /// Left and right pointers are set to nullptr
76 /// @param s Pointer to the symbol in #"symbols", that this node represents.
78 : left (s)
79 , right (nullptr)
80 , frequency (s->frequency) {}
81
82 /// Constructs a node as an internal non-leaf node.
83 /// Left and right pointers are set to as given.
84 /// @param l Pointer to the left node.
85 /// @param r Pointer to the right node.
86 Node(Node* l, Node* r)
87 : left (l)
88 , right (r)
89 , frequency (l->frequency + r->frequency) {}
90
91 /// Determines if this is a leaf node.
92 /// @returns \c true if this is a leaf node.
93 bool isLeaf() const { return right == nullptr; }
94
95 /// Must be called only for leaf nodes.
96 /// @returns The pointer to the symbol.
97 Symbol* getSymbol() const { return left.symbol; }
98
99 /// Must be called only for non-leaf nodes.
100 /// @returns The left node.
101 Node* getLeft() const { return left.left; }
102
103 /// @returns The right node.
104 Node* getRight() const { return right; }
105}; //struct Node
106} // anonymous namespace
107
108
110 constexpr int maxNodesNeeded= 256 + 255;
111 Node nodePool[maxNodesNeeded];
112
113 // buildTree
114 Node* tree;
115 {
116 struct cmpHN { bool operator()(Node* l, Node* r) { return (l->frequency > r->frequency); } };
117
118 // create a list of nodes, sorted by frequency (least in front)
119 // Note: for this, we first create fixed array on the stack, where we fill the
120 // nodes unsorted. Then we move this into the priority queue, what sorts them
121 // once. (unfortunately the move copies the array, but still it is faster than
122 // if the heap was sorted with each insert)
123 int npNext= 0;
124 FixedCapacityVector<Node*, 256> underlyingNodeVector;
125 for (std::size_t i = 0; i < 256 ; ++i)
126 if( (symbols + i)->frequency > 0 )
127 underlyingNodeVector.push_back( new (nodePool + npNext++) Node(symbols + i) );
128 FixedSizePriorityQueue<Node*, 256, cmpHN> sortedNodes(cmpHN(), std::move(underlyingNodeVector));
129
130ALIB_DBG( dbgAllValuesAreSame= (sortedNodes.size() == 1); )
131 // Merge two front nodes into one, until one node remains
132 while (sortedNodes.size() > 1) {
133 // Extract two least freq nodes
134 Node* left = sortedNodes.top(); sortedNodes.pop();
135 Node* right= sortedNodes.top(); sortedNodes.pop();
136
137 // New internal node with frequency equal to the sum of the two nodes frequencies.
138 // The two extracted nodes become left and right children
139 sortedNodes.push(new ( nodePool + npNext++ ) Node(left, right) );
140 }
141
142 tree= sortedNodes.top();
143 ALIB_ASSERT_ERROR( npNext <= maxNodesNeeded , "BITBUFFER/AC/HFMN", "This can never happen" )
144 }
145
146 // generate codes and write tree information to bit buffer
147 {
148 struct Stack
149 {
150 Node* node;
151 int walked;
152 };
153
154 int depth = 0;
155 Stack stack[MAX_CODE_LENGTH];
156 stack[0] = Stack{ tree, 0 };
157 uint32_t words[MAX_WORDS] = {};
158
159TEMP_PT(Log_Warning("------ Huffman Encoding Table ----------") )
160
161 // 'recursion loop'
162 while(depth>=0) {
163 auto* node= stack[depth].node;
164
165 auto wordNo= depth / WORD_SIZE;
166 auto bitNo = depth % WORD_SIZE;
167
168 // leaf?
169 if( node->isLeaf() ) {
170 // write '1' for leaf and symbol value
171 bw.WriteBits<9>( 1 | static_cast<unsigned>(node->getSymbol() - symbols) << 1 );
172
173 // store word length and words to symbol's data
174 node->getSymbol()->wordLength= depth;
175 for( int i= 0 ; i <= wordNo ; ++i )
176 node->getSymbol()->words[i]= words[i];
177
178TEMP_PT( NString512 bits; bits << Bin(node->symbol->words[0], node->symbol->wordLength);
179 bits.Reverse();
180 Lox_Warning("HM I: {:3}: {:<15} (len={!ATAB:2}, freq={:>5})",
181 (node->symbol - symbols),
182 bits,
183 node->symbol->wordLength,
184 node->symbol->frequency ) )
185
186 // clear last bit and step up
187 words[wordNo]&= ~(uint32_t(1) << ( bitNo ) );
188 --depth;
189 continue;
190 }
191
192 // write '0' for not being a leave
193 bw.WriteBits<1>( 0u );
194
195 // process left child
196 if( stack[depth].walked == 0) {
197 stack[depth].walked++;
198 stack[depth+1]= Stack{ node->getLeft() , 0};
199 depth++;
200 continue;
201 }
202
203 // process right child
204 if( stack[depth].walked == 1) {
205 stack[depth].walked++;
206 words[wordNo] |= (1 << ( bitNo ) );
207 stack[depth+1]= Stack{ node->getRight() , 0};
208 depth++;
209 continue;
210 }
211
212 // clear last bit and step up
213 words[wordNo]&= ~(uint32_t(1) << ( bitNo ) );
214 --depth;
215 }
216TEMP_PT( Log_Warning("------End of Huffman Encoding Table ----------") )
217} }
218
220TEMP_PT( Log_Warning("------ Huffman Decoding Table ----------") )
221TEMP_PT( String512 dbgWord; )
222 struct Stack
223 {
224 Node* node;
225 int read; // 0: none, 1: left, 2: both
226 };
227
228 int depth;
230 stack[depth= 0]= Stack{ &tree, 0 };
231 while(depth>=0) {
232 auto* node= stack[depth].node;
233 // leaf?
234 if( br.ReadBits<1>() ) {
235 node->symbol= uint8_t(br.ReadBits<8>());
236TEMP_PT( Log_Warning("HM D: {:3}: {:5} (len={})",
237 (node->symbol),
238 dbgWord,
239 dbgWord.Length() ) )
240
241 --depth;
242TEMP_PT( dbgWord.DeleteEnd(1); )
243 continue;
244 }
245
246 // left 'recursion'
247 if( stack[depth].read == 0) {
248 ++stack[depth].read;
249 node->left= new ( nodePool + npNext++ ) Node();
250TEMP_PT( dbgWord << '0'; )
251 stack[depth+1]= Stack{ node->left , 0 };
252 ++depth;
253 continue;
254 }
255
256 // right 'recursion'
257 if( stack[depth].read == 1) {
258 ++stack[depth].read;
259TEMP_PT( dbgWord << '1'; )
260 node->right= new ( nodePool + npNext++ ) Node();
261 stack[depth+1]= Stack{ node->right , 0 };
262 ++depth;
263 continue;
264 }
265
266TEMP_PT( dbgWord.DeleteEnd(1); )
267 --depth;
268 }
269
270ALIB_ASSERT_ERROR( npNext <= MAX_NODES, "BITBUFFER/AC/HFMN", "This can never happen" )
271
272TEMP_PT( Log_Warning("------End of Huffman Decoding Table ----------") )
273}
274
275
276
277
278#undef TEMP_PT
279
280}}} // namespace [alib::bitbuffer::ac_v1]
#define ALIB_DBG(...)
Definition alib.inl:931
#define ALIB_ASSERT_ERROR(cond, domain,...)
Definition alib.inl:1144
#define Log_Warning(...)
#define Lox_Warning(...)
int npNext
The next node in nodePool to use.
Definition huffman.inl:131
static constexpr int MAX_NODES
The maximum number of nodes in the tree.
Definition huffman.inl:126
Node tree
The root node of the symbol tree.
Definition huffman.inl:129
Node nodePool[MAX_NODES]
Pre-allocated node objects.
Definition huffman.inl:130
BitReader & br
The bit reader given in the constructor.
Definition huffman.inl:128
BitWriter & bw
The bit writer to use for encoding the data.
Definition huffman.inl:49
static constexpr int MAX_CODE_LENGTH
Definition huffman.inl:35
Symbol symbols[256]
The symbol table.
Definition huffman.inl:50
void Generate()
Generates the huffman encoding table and writes this information to the bit writer.
Definition huffman.cpp:109
HuffmanEncoder::Symbol Symbol
Shortcut to the symbol-struct.
Definition huffman.cpp:40
size_t frequency
The frequency of the symbol or the sum of the two subtrees.
Definition huffman.cpp:69
Node()=default
Default constructor used when defining arrays of nodes on stack memory.
std::priority_queue< T, FixedCapacityVector< T, TSize >, TCompare > FixedSizePriorityQueue
strings::TBin< character > Bin
Type alias in namespace alib.
Definition format.inl:568
containers::FixedCapacityVector< T, TSize > FixedCapacityVector
Type alias in namespace alib.
NLocalString< 512 > NString512
Type alias name for #"TLocalString;TLocalString<nchar,512>".
LocalString< 512 > String512
Type alias name for #"TLocalString;TLocalString<character,512>".
Internal struct representing nodes of the huffman code tree.
Definition huffman.inl:114
Symbol * symbol
The symbol represented by this node (if not nullptr).
Definition huffman.cpp:47