11 #define MAX_SYMBOLS (1<<16) 19 return numbits / 8 + (numbits % 8 ? 1 : 0);
37 TreeNode(uint16_t sym,
size_t cnt=0) : parent(0), isLeaf(true)
45 count = n0 && n1 ? n0->
count + n1->count : 0;
46 zero = n0 && n1 ? (n0->count > n1->count ? n0 : n1) : NULL;
47 one = n0 && n1 ? (n0->count > n1->count ? n1 : n0) : NULL;
92 if (nbits>
sizeof(
size_t)*8)
93 throw std::runtime_error(
"Too many different symbols - this should not happen!");
95 if (nbits>
sizeof(
size_t)*8)
108 CreateEncoder(n->
zero, bits, nbits+1) &&
109 CreateEncoder(n->
one, bits | (1<<nbits), nbits+1);
115 out.append((
char*)&count,
sizeof(
size_t));
119 const Code &n = lut[
i];
124 out.append((
char*)&
i,
sizeof(uint16_t));
129 out.append((
char*)&n.
numbits,
sizeof(uint8_t));
133 out.append((
char*)&n.
bits, numbytes);
137 void Encode(std::string &out,
const uint16_t *bufin,
size_t bufinlen)
const 145 for (uint32_t
i=0;
i<bufinlen; ++
i)
147 const uint16_t &
symbol = bufin[
i];
152 const uint8_t *bits = (uint8_t*)&code->
bits;
157 const uint8_t free_bits = 8 - curbit;
160 curbyte |= *bits<<curbit;
165 if (nbits>=free_bits)
168 curbyte = *bits>>free_bits;
175 const uint8_t consumed = nbits>8 ? 8 : nbits;
187 Encoder(
const uint16_t *bufin,
size_t bufinlen) : count(0)
193 for (
const uint16_t *p=bufin; p<bufin+bufinlen; p++)
197 std::multiset<TreeNode*, TreeNode>
set;
200 set.insert(
new TreeNode(i, counts[i]));
205 auto it =
set.begin();
212 set.erase(it1, ++it2);
218 const TreeNode *root = *
set.begin();
248 void Set(uint16_t sym, uint8_t n=0,
size_t bits=0)
255 lut[bits&0xff].
Set(sym, n-8, bits>>8);
259 const int nn = 1<<(8-n);
261 for (
int i=0;
i<nn;
i++)
263 const uint8_t key = bits | (
i<<n);
279 Build(*p.
zero, bits, n+1);
280 Build(*p.
one, bits | (1<<n), n+1);
288 const uint8_t *
Decode(
const uint8_t *in_ptr,
const uint8_t *in_end,
289 uint16_t *out_ptr,
const uint16_t *out_end)
const 295 while (out_ptr < out_end)
301 while (in_ptr<in_end && out_ptr<out_end)
303 const uint16_t *two = (uint16_t*)in_ptr;
305 const uint8_t curbyte = (*two >> curbit);
309 throw std::runtime_error(
"Unknown bitcode in stream!");
315 p = p->
lut + curbyte;
335 return curbit ? in_ptr+1 : in_ptr;
338 Decoder(
const uint8_t* bufin, int64_t &pindex) : isLeaf(false), lut(NULL)
344 memcpy(&count, bufin + pindex,
sizeof(count));
345 pindex +=
sizeof(
count);
351 memcpy(&sym, bufin + pindex,
sizeof(uint16_t));
352 pindex +=
sizeof(uint16_t);
361 memcpy(&numbits, bufin + pindex,
sizeof(uint8_t));
362 pindex +=
sizeof(uint8_t);
367 if (numbytes>
sizeof(
size_t))
368 throw std::runtime_error(
"Number of bytes for a single symbol exceeds maximum.");
370 if (numbytes>
sizeof(
size_t))
377 memcpy(&bits, bufin+pindex, numbytes);
380 Set(sym, numbits, bits);
385 inline bool Encode(std::string &bufout,
const uint16_t *bufin,
size_t bufinlen)
387 const Encoder encoder(bufin, bufinlen);
390 if (encoder.
count==0)
394 bufout.append((
char*)&bufinlen,
sizeof(
size_t));
396 encoder.
Encode(bufout, bufin, bufinlen);
401 inline int64_t
Decode(
const uint8_t *bufin,
403 std::vector<uint16_t> &pbufout)
408 size_t data_count = 0;
409 memcpy(&data_count, bufin,
sizeof(
size_t));
413 pbufout.resize(data_count);
415 const Decoder decoder(bufin, i);
422 const uint8_t *in_ptr =
423 decoder.
Decode(bufin+i, bufin+bufinlen,
424 pbufout.data(), pbufout.data()+data_count);
Encoder(const uint16_t *bufin, size_t bufinlen)
void Encode(std::string &out, const uint16_t *bufin, size_t bufinlen) const
TreeNode(TreeNode *n0=0, TreeNode *n1=0)
TreeNode(uint16_t sym, size_t cnt=0)
int64_t Decode(const uint8_t *bufin, size_t bufinlen, std::vector< uint16_t > &pbufout)
void WriteCodeTable(std::string &out) const
const uint8_t * Decode(const uint8_t *in_ptr, const uint8_t *in_end, uint16_t *out_ptr, const uint16_t *out_end) const
bool Encode(std::string &bufout, const uint16_t *bufin, size_t bufinlen)
static unsigned long numbytes_from_numbits(unsigned long numbits)
bool CreateEncoder(const TreeNode *n, size_t bits=0, uint8_t nbits=0)
void Set(uint16_t sym, uint8_t n=0, size_t bits=0)
Decoder(const TreeNode &p)
void Build(const TreeNode &p, uint64_t bits=0, uint8_t n=0)
bool operator()(const TreeNode *hn1, const TreeNode *hn2) const
Decoder(const uint8_t *bufin, int64_t &pindex)