1 |
/* xdelta 3 - delta compression tools and library |
2 |
* Copyright (C) 2002, 2006, 2007. Joshua P. MacDonald |
3 |
* |
4 |
* This program is free software; you can redistribute it and/or modify |
5 |
* it under the terms of the GNU General Public License as published by |
6 |
* the Free Software Foundation; either version 2 of the License, or |
7 |
* (at your option) any later version. |
8 |
* |
9 |
* This program is distributed in the hope that it will be useful, |
10 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 |
* GNU General Public License for more details. |
13 |
* |
14 |
* You should have received a copy of the GNU General Public License |
15 |
* along with this program; if not, write to the Free Software |
16 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 |
*/ |
18 |
|
19 |
#ifndef _XDELTA3_DJW_H_ |
20 |
#define _XDELTA3_DJW_H_ |
21 |
|
22 |
/* The following people deserve much credit for the algorithms and techniques contained in |
23 |
* this file: |
24 |
|
25 |
Julian Seward |
26 |
Bzip2 sources, implementation of the multi-table Huffman technique. |
27 |
|
28 |
Jean-loup Gailly and Mark Adler and L. Peter Deutsch |
29 |
Zlib source code, RFC 1951 |
30 |
|
31 |
Daniel S. Hirschberg and Debra A. LeLewer |
32 |
"Efficient Decoding of Prefix Codes" |
33 |
Communications of the ACM, April 1990 33(4). |
34 |
|
35 |
David J. Wheeler |
36 |
Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps. |
37 |
This contains the idea behind the multi-table Huffman and 1-2 coding techniques. |
38 |
ftp://ftp.cl.cam.ac.uk/users/djw3/ |
39 |
|
40 |
*/ |
41 |
|
42 |
/* OPT: during the multi-table iteration, pick the worst-overall performing table and |
43 |
* replace it with exactly the frequencies of the worst-overall performing sector or |
44 |
* N-worst performing sectors. */ |
45 |
|
46 |
/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with the Bzip prefix coding |
47 |
* strategy. xdfs-0.256 contains the last of the other-format tests, including RFC1950 |
48 |
* and the RFC1950+MTF tests. */ |
49 |
|
50 |
#define DJW_MAX_CODELEN 32 /* Maximum length of an alphabet code. */ |
51 |
|
52 |
#define DJW_TOTAL_CODES (DJW_MAX_CODELEN+2) /* [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */ |
53 |
|
54 |
#define RUN_0 0 /* Symbols used in MTF+1/2 coding. */ |
55 |
#define RUN_1 1 |
56 |
|
57 |
#define DJW_BASIC_CODES 5 /* Number of code lengths always encoded (djw_encode_basic array) */ |
58 |
#define DJW_RUN_CODES 2 /* Number of run codes */ |
59 |
#define DJW_EXTRA_12OFFSET 7 /* Offset of extra codes */ |
60 |
#define DJW_EXTRA_CODES 15 /* Number of optionally encoded code lengths (djw_encode_extra array) */ |
61 |
#define DJW_EXTRA_CODE_BITS 4 /* Number of bits to code [0-DJW_EXTRA_CODES] */ |
62 |
|
63 |
#define DJW_MAX_GROUPS 8 /* Max number of group coding tables */ |
64 |
#define DJW_GROUP_BITS 3 /* Number of bits to code [1-DJW_MAX_GROUPS] */ |
65 |
|
66 |
#define DJW_SECTORSZ_MULT 5 /* Multiplier for encoded sectorsz */ |
67 |
#define DJW_SECTORSZ_BITS 5 /* Number of bits to code group size */ |
68 |
#define DJW_SECTORSZ_MAX ((1 << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT) |
69 |
|
70 |
#define DJW_MAX_ITER 6 /* Maximum number of iterations to find group tables. */ |
71 |
#define DJW_MIN_IMPROVEMENT 20 /* Minimum number of bits an iteration must reduce coding by. */ |
72 |
|
73 |
#define DJW_MAX_CLCLEN 15 /* Maximum code length of a prefix code length */ |
74 |
#define DJW_CLCLEN_BITS 4 /* Number of bits to code [0-DJW_MAX_CLCLEN] */ |
75 |
|
76 |
#define DJW_MAX_GBCLEN 7 /* Maximum code length of a group selector */ |
77 |
#define DJW_GBCLEN_BITS 3 /* Number of bits to code [0-DJW_MAX_GBCLEN] |
78 |
* @!@ Actually, should never have zero code lengths here, or |
79 |
* else a group went unused. Write a test for this: if a group |
80 |
* goes unused, eliminate it? */ |
81 |
|
82 |
#define EFFICIENCY_BITS 16 /* It has to save at least this many bits... */ |
83 |
|
84 |
typedef struct _djw_stream djw_stream; |
85 |
typedef struct _djw_heapen djw_heapen; |
86 |
typedef struct _djw_prefix djw_prefix; |
87 |
typedef uint32_t djw_weight; |
88 |
|
89 |
/* To enable Huffman tuning code... */ |
90 |
#ifndef TUNE_HUFFMAN |
91 |
#define TUNE_HUFFMAN 0 |
92 |
#endif |
93 |
|
94 |
#if TUNE_HUFFMAN == 0 |
95 |
#define xd3_real_encode_huff xd3_encode_huff |
96 |
#define IF_TUNE(x) |
97 |
#define IF_NTUNE(x) x |
98 |
#else |
99 |
static uint xd3_bitsof_output (xd3_output *output, bit_state *bstate); |
100 |
#define IF_TUNE(x) x |
101 |
#define IF_NTUNE(x) |
102 |
static djw_weight tune_freq[DJW_TOTAL_CODES]; |
103 |
static uint8_t tune_clen[DJW_MAX_GROUPS][ALPHABET_SIZE]; |
104 |
static usize_t tune_prefix_bits; |
105 |
static usize_t tune_select_bits; |
106 |
static usize_t tune_encode_bits; |
107 |
#endif |
108 |
struct _djw_heapen |
109 |
{ |
110 |
uint32_t depth; |
111 |
uint32_t freq; |
112 |
uint32_t parent; |
113 |
}; |
114 |
|
115 |
struct _djw_prefix |
116 |
{ |
117 |
usize_t scount; |
118 |
uint8_t *symbol; |
119 |
usize_t mcount; |
120 |
uint8_t *mtfsym; |
121 |
uint8_t *repcnt; |
122 |
}; |
123 |
|
124 |
struct _djw_stream |
125 |
{ |
126 |
int unused; |
127 |
}; |
128 |
|
129 |
/* Each Huffman table consists of 256 "code length" (CLEN) codes, which are themselves |
130 |
* Huffman coded after eliminating repeats and move-to-front coding. The prefix consists |
131 |
* of all the CLEN codes in djw_encode_basic plus a 4-bit value stating how many of the |
132 |
* djw_encode_extra codes are actually coded (the rest are presumed zero, or unused CLEN |
133 |
* codes). |
134 |
* |
135 |
* These values of these two arrays were arrived at by studying the distribution of min |
136 |
* and max clen over a collection of DATA, INST, and ADDR inputs. The goal is to specify |
137 |
* the order of djw_extra_codes that is most likely to minimize the number of extra codes |
138 |
* that must be encoded. |
139 |
* |
140 |
* Results: 158896 sections were counted by compressing files (window size 512K) listed |
141 |
* with: `find / -type f ( -user jmacd -o -perm +444 )` |
142 |
* |
143 |
* The distribution of CLEN codes for each efficient invocation of the secondary |
144 |
* compressor (taking the best number of groups/sector size) was recorded. Then we look at |
145 |
* the distribution of min and max clen values, counting the number of times the value |
146 |
* C_low is less than the min and C_high is greater than the max. Values >= C_high and <= |
147 |
* C_low will not have their lengths coded. The results are sorted and the least likely |
148 |
* 15 are placed into the djw_encode_extra[] array in order. These values are used as |
149 |
* the initial MTF ordering. |
150 |
|
151 |
clow[1] = 155119 |
152 |
clow[2] = 140325 |
153 |
clow[3] = 84072 |
154 |
--- |
155 |
clow[4] = 7225 |
156 |
clow[5] = 1093 |
157 |
clow[6] = 215 |
158 |
--- |
159 |
chigh[4] = 1 |
160 |
chigh[5] = 30 |
161 |
chigh[6] = 218 |
162 |
chigh[7] = 2060 |
163 |
chigh[8] = 13271 |
164 |
--- |
165 |
chigh[9] = 39463 |
166 |
chigh[10] = 77360 |
167 |
chigh[11] = 118298 |
168 |
chigh[12] = 141360 |
169 |
chigh[13] = 154086 |
170 |
chigh[14] = 157967 |
171 |
chigh[15] = 158603 |
172 |
chigh[16] = 158864 |
173 |
chigh[17] = 158893 |
174 |
chigh[18] = 158895 |
175 |
chigh[19] = 158896 |
176 |
chigh[20] = 158896 |
177 |
|
178 |
*/ |
179 |
|
180 |
static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] = |
181 |
{ |
182 |
9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20 |
183 |
}; |
184 |
|
185 |
static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] = |
186 |
{ |
187 |
4, 5, 6, 7, 8, |
188 |
}; |
189 |
|
190 |
/*********************************************************************/ |
191 |
/* DECLS */ |
192 |
/*********************************************************************/ |
193 |
|
194 |
static djw_stream* djw_alloc (xd3_stream *stream /*, int alphabet_size */); |
195 |
static void djw_init (djw_stream *h); |
196 |
static void djw_destroy (xd3_stream *stream, |
197 |
djw_stream *h); |
198 |
|
199 |
#if XD3_ENCODER |
200 |
static int xd3_encode_huff (xd3_stream *stream, |
201 |
djw_stream *sec_stream, |
202 |
xd3_output *input, |
203 |
xd3_output *output, |
204 |
xd3_sec_cfg *cfg); |
205 |
#endif |
206 |
|
207 |
static int xd3_decode_huff (xd3_stream *stream, |
208 |
djw_stream *sec_stream, |
209 |
const uint8_t **input, |
210 |
const uint8_t *const input_end, |
211 |
uint8_t **output, |
212 |
const uint8_t *const output_end); |
213 |
|
214 |
/*********************************************************************/ |
215 |
/* HUFFMAN */ |
216 |
/*********************************************************************/ |
217 |
|
218 |
static djw_stream* |
219 |
djw_alloc (xd3_stream *stream) |
220 |
{ |
221 |
return xd3_alloc (stream, sizeof (djw_stream), 1); |
222 |
} |
223 |
|
224 |
static void |
225 |
djw_init (djw_stream *h) |
226 |
{ |
227 |
/* Fields are initialized prior to use. */ |
228 |
} |
229 |
|
230 |
static void |
231 |
djw_destroy (xd3_stream *stream, |
232 |
djw_stream *h) |
233 |
{ |
234 |
xd3_free (stream, h); |
235 |
} |
236 |
|
237 |
|
238 |
/*********************************************************************/ |
239 |
/* HEAP */ |
240 |
/*********************************************************************/ |
241 |
|
242 |
static INLINE int |
243 |
heap_less (const djw_heapen *a, const djw_heapen *b) |
244 |
{ |
245 |
return a->freq < b->freq || |
246 |
(a->freq == b->freq && |
247 |
a->depth < b->depth); |
248 |
} |
249 |
|
250 |
static INLINE void |
251 |
heap_insert (uint *heap, const djw_heapen *ents, uint p, const uint e) |
252 |
{ |
253 |
/* Insert ents[e] into next slot heap[p] */ |
254 |
uint pp = p/2; /* P's parent */ |
255 |
|
256 |
while (heap_less (& ents[e], & ents[heap[pp]])) |
257 |
{ |
258 |
heap[p] = heap[pp]; |
259 |
p = pp; |
260 |
pp = p/2; |
261 |
} |
262 |
|
263 |
heap[p] = e; |
264 |
} |
265 |
|
266 |
static INLINE djw_heapen* |
267 |
heap_extract (uint *heap, const djw_heapen *ents, uint heap_last) |
268 |
{ |
269 |
uint smallest = heap[1]; |
270 |
uint p, pc, t; |
271 |
|
272 |
/* Caller decrements heap_last, so heap_last+1 is the replacement elt. */ |
273 |
heap[1] = heap[heap_last+1]; |
274 |
|
275 |
/* Re-heapify */ |
276 |
for (p = 1; ; p = pc) |
277 |
{ |
278 |
pc = p*2; |
279 |
|
280 |
/* Reached bottom of heap */ |
281 |
if (pc > heap_last) { break; } |
282 |
|
283 |
/* See if second child is smaller. */ |
284 |
if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]])) { pc += 1; } |
285 |
|
286 |
/* If pc is not smaller than p, heap property re-established. */ |
287 |
if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; } |
288 |
|
289 |
t = heap[pc]; |
290 |
heap[pc] = heap[p]; |
291 |
heap[p] = t; |
292 |
} |
293 |
|
294 |
return (djw_heapen*) & ents[smallest]; |
295 |
} |
296 |
|
297 |
#if XD3_DEBUG |
298 |
static void |
299 |
heap_check (uint *heap, djw_heapen *ents, uint heap_last) |
300 |
{ |
301 |
uint i; |
302 |
for (i = 1; i <= heap_last; i += 1) |
303 |
{ |
304 |
/* Heap property: child not less than parent */ |
305 |
XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]])); |
306 |
} |
307 |
} |
308 |
#endif |
309 |
|
310 |
/*********************************************************************/ |
311 |
/* MTF, 1/2 */ |
312 |
/*********************************************************************/ |
313 |
|
314 |
static INLINE usize_t |
315 |
djw_update_mtf (uint8_t *mtf, usize_t mtf_i) |
316 |
{ |
317 |
int k; |
318 |
usize_t sym = mtf[mtf_i]; |
319 |
|
320 |
for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; } |
321 |
|
322 |
mtf[0] = sym; |
323 |
return sym; |
324 |
} |
325 |
|
326 |
static INLINE void |
327 |
djw_update_1_2 (int *mtf_run, usize_t *mtf_i, uint8_t *mtfsym, djw_weight *freq) |
328 |
{ |
329 |
int code; |
330 |
|
331 |
do |
332 |
{ |
333 |
/* Offset by 1, since any number of RUN_ symbols implies run>0... */ |
334 |
*mtf_run -= 1; |
335 |
|
336 |
code = (*mtf_run & 1) ? RUN_1 : RUN_0; |
337 |
|
338 |
mtfsym[(*mtf_i)++] = code; |
339 |
freq[code] += 1; |
340 |
*mtf_run >>= 1; |
341 |
} |
342 |
while (*mtf_run >= 1); |
343 |
|
344 |
*mtf_run = 0; |
345 |
} |
346 |
|
347 |
static void |
348 |
djw_init_clen_mtf_1_2 (uint8_t *clmtf) |
349 |
{ |
350 |
int i, cl_i = 0; |
351 |
|
352 |
clmtf[cl_i++] = 0; |
353 |
for (i = 0; i < DJW_BASIC_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12basic[i]; } |
354 |
for (i = 0; i < DJW_EXTRA_CODES; i += 1) { clmtf[cl_i++] = djw_encode_12extra[i]; } |
355 |
} |
356 |
|
357 |
/*********************************************************************/ |
358 |
/* PREFIX CODES */ |
359 |
/*********************************************************************/ |
360 |
#if XD3_ENCODER |
361 |
static usize_t |
362 |
djw_build_prefix (const djw_weight *freq, uint8_t *clen, int asize, int maxlen) |
363 |
{ |
364 |
/* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1 internal nodes, |
365 |
* never more than ALPHABET_SIZE entries actually in the heap (minimum weight subtrees |
366 |
* during prefix construction). First ALPHABET_SIZE entries are the actual symbols, |
367 |
* next ALPHABET_SIZE-1 are internal nodes. */ |
368 |
djw_heapen ents[ALPHABET_SIZE * 2]; |
369 |
uint heap[ALPHABET_SIZE + 1]; |
370 |
|
371 |
uint heap_last; /* Index of the last _valid_ heap entry. */ |
372 |
uint ents_size; /* Number of entries, including 0th fake entry */ |
373 |
int overflow; /* Number of code lengths that overflow */ |
374 |
uint32_t total_bits; |
375 |
int i; |
376 |
|
377 |
IF_DEBUG (uint32_t first_bits = 0); |
378 |
|
379 |
/* Insert real symbol frequences. */ |
380 |
for (i = 0; i < asize; i += 1) |
381 |
{ |
382 |
ents[i+1].freq = freq[i]; |
383 |
} |
384 |
|
385 |
again: |
386 |
|
387 |
/* The loop is re-entered each time an overflow occurs. Re-initialize... */ |
388 |
heap_last = 0; |
389 |
ents_size = 1; |
390 |
overflow = 0; |
391 |
total_bits = 0; |
392 |
|
393 |
/* 0th entry terminates the while loop in heap_insert (its the parent of the smallest |
394 |
* element, always less-than) */ |
395 |
heap[0] = 0; |
396 |
ents[0].depth = 0; |
397 |
ents[0].freq = 0; |
398 |
|
399 |
/* Initial heap. */ |
400 |
for (i = 0; i < asize; i += 1, ents_size += 1) |
401 |
{ |
402 |
ents[ents_size].depth = 0; |
403 |
ents[ents_size].parent = 0; |
404 |
|
405 |
if (ents[ents_size].freq != 0) |
406 |
{ |
407 |
heap_insert (heap, ents, ++heap_last, ents_size); |
408 |
} |
409 |
} |
410 |
|
411 |
IF_DEBUG (heap_check (heap, ents, heap_last)); |
412 |
|
413 |
/* Must be at least one symbol, or else we can't get here. */ |
414 |
XD3_ASSERT (heap_last != 0); |
415 |
|
416 |
/* If there is only one symbol, fake a second to prevent zero-length codes. */ |
417 |
if (unlikely (heap_last == 1)) |
418 |
{ |
419 |
/* Pick either the first or last symbol. */ |
420 |
int s = freq[0] ? asize-1 : 0; |
421 |
ents[s+1].freq = 1; |
422 |
goto again; |
423 |
} |
424 |
|
425 |
/* Build prefix tree. */ |
426 |
while (heap_last > 1) |
427 |
{ |
428 |
djw_heapen *h1 = heap_extract (heap, ents, --heap_last); |
429 |
djw_heapen *h2 = heap_extract (heap, ents, --heap_last); |
430 |
|
431 |
ents[ents_size].freq = h1->freq + h2->freq; |
432 |
ents[ents_size].depth = 1 + max (h1->depth, h2->depth); |
433 |
ents[ents_size].parent = 0; |
434 |
|
435 |
h1->parent = h2->parent = ents_size; |
436 |
|
437 |
heap_insert (heap, ents, ++heap_last, ents_size++); |
438 |
|
439 |
IF_DEBUG (heap_check (heap, ents, heap_last)); |
440 |
} |
441 |
|
442 |
/* Now compute prefix code lengths, counting parents. */ |
443 |
for (i = 1; i < asize+1; i += 1) |
444 |
{ |
445 |
int b = 0; |
446 |
|
447 |
if (ents[i].freq != 0) |
448 |
{ |
449 |
int p = i; |
450 |
|
451 |
while ((p = ents[p].parent) != 0) { b += 1; } |
452 |
|
453 |
if (b > maxlen) { overflow = 1; } |
454 |
|
455 |
total_bits += b * freq[i-1]; |
456 |
} |
457 |
|
458 |
/* clen is 0-origin, unlike ents. */ |
459 |
clen[i-1] = b; |
460 |
} |
461 |
|
462 |
IF_DEBUG (if (first_bits == 0) first_bits = total_bits); |
463 |
|
464 |
if (! overflow) |
465 |
{ |
466 |
IF_DEBUG (if (first_bits != total_bits) |
467 |
{ |
468 |
DP(RINT "code length overflow changed %u bits\n", (usize_t)(total_bits - first_bits)); |
469 |
}); |
470 |
return total_bits; |
471 |
} |
472 |
|
473 |
/* OPT: There is a non-looping way to fix overflow shown in zlib, but this is easier |
474 |
* (for now), as done in bzip2. */ |
475 |
for (i = 1; i < asize+1; i += 1) |
476 |
{ |
477 |
ents[i].freq = ents[i].freq / 2 + 1; |
478 |
} |
479 |
|
480 |
goto again; |
481 |
} |
482 |
|
483 |
static void |
484 |
djw_build_codes (uint *codes, const uint8_t *clen, int asize DEBUG_ARG (int abs_max)) |
485 |
{ |
486 |
int i, l; |
487 |
int min_clen = DJW_MAX_CODELEN; |
488 |
int max_clen = 0; |
489 |
uint code = 0; |
490 |
|
491 |
for (i = 0; i < asize; i += 1) |
492 |
{ |
493 |
if (clen[i] > 0 && clen[i] < min_clen) |
494 |
{ |
495 |
min_clen = clen[i]; |
496 |
} |
497 |
|
498 |
max_clen = max (max_clen, (int) clen[i]); |
499 |
} |
500 |
|
501 |
XD3_ASSERT (max_clen <= abs_max); |
502 |
|
503 |
for (l = min_clen; l <= max_clen; l += 1) |
504 |
{ |
505 |
for (i = 0; i < asize; i += 1) |
506 |
{ |
507 |
if (clen[i] == l) { codes[i] = code++; } |
508 |
} |
509 |
|
510 |
code <<= 1; |
511 |
} |
512 |
} |
513 |
|
514 |
/*********************************************************************/ |
515 |
/* MOVE-TO-FRONT */ |
516 |
/*********************************************************************/ |
517 |
static void |
518 |
djw_compute_mtf_1_2 (djw_prefix *prefix, |
519 |
uint8_t *mtf, |
520 |
djw_weight *freq_out, /* freak out! */ |
521 |
usize_t nsym) |
522 |
{ |
523 |
int i, j, k; |
524 |
usize_t sym; |
525 |
usize_t size = prefix->scount; |
526 |
usize_t mtf_i = 0; |
527 |
int mtf_run = 0; |
528 |
|
529 |
memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+1)); |
530 |
|
531 |
for (i = 0; i < size; ) |
532 |
{ |
533 |
/* OPT: Bzip optimizes this algorithm a little by effectively checking j==0 before |
534 |
* the MTF update. */ |
535 |
sym = prefix->symbol[i++]; |
536 |
|
537 |
for (j = 0; mtf[j] != sym; j += 1) { } |
538 |
|
539 |
XD3_ASSERT (j < nsym); |
540 |
|
541 |
for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; } |
542 |
|
543 |
mtf[0] = sym; |
544 |
|
545 |
if (j == 0) |
546 |
{ |
547 |
mtf_run += 1; |
548 |
continue; |
549 |
} |
550 |
|
551 |
if (mtf_run > 0) |
552 |
{ |
553 |
djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); |
554 |
} |
555 |
|
556 |
/* Non-zero symbols are offset by RUN_1 */ |
557 |
prefix->mtfsym[mtf_i++] = j+RUN_1; |
558 |
freq_out[j+RUN_1] += 1; |
559 |
} |
560 |
|
561 |
if (mtf_run > 0) |
562 |
{ |
563 |
djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); |
564 |
} |
565 |
|
566 |
prefix->mcount = mtf_i; |
567 |
} |
568 |
|
569 |
static usize_t |
570 |
djw_count_freqs (djw_weight *freq, xd3_output *input) |
571 |
{ |
572 |
xd3_output *in; |
573 |
usize_t size = 0; |
574 |
|
575 |
memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE); |
576 |
|
577 |
/* Freqency counting. OPT: can be accomplished beforehand. */ |
578 |
for (in = input; in; in = in->next_page) |
579 |
{ |
580 |
const uint8_t *p = in->base; |
581 |
const uint8_t *p_max = p + in->next; |
582 |
|
583 |
size += in->next; |
584 |
|
585 |
do { freq[*p++] += 1; } while (p < p_max); |
586 |
} |
587 |
|
588 |
IF_DEBUG1 ({int i; |
589 |
DP(RINT "freqs: "); |
590 |
for (i = 0; i < ALPHABET_SIZE; i += 1) { DP(RINT "%u ", freq[i]); } |
591 |
DP(RINT "\n");}); |
592 |
|
593 |
return size; |
594 |
} |
595 |
|
596 |
static void |
597 |
djw_compute_multi_prefix (int groups, |
598 |
uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE], |
599 |
djw_prefix *prefix) |
600 |
{ |
601 |
int gp, i; |
602 |
|
603 |
prefix->scount = ALPHABET_SIZE; |
604 |
memcpy (prefix->symbol, clen[0], ALPHABET_SIZE); |
605 |
|
606 |
for (gp = 1; gp < groups; gp += 1) |
607 |
{ |
608 |
for (i = 0; i < ALPHABET_SIZE; i += 1) |
609 |
{ |
610 |
if (clen[gp][i] == 0) |
611 |
{ |
612 |
continue; |
613 |
} |
614 |
|
615 |
prefix->symbol[prefix->scount++] = clen[gp][i]; |
616 |
} |
617 |
} |
618 |
} |
619 |
|
620 |
static void |
621 |
djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq) |
622 |
{ |
623 |
uint8_t clmtf[DJW_MAX_CODELEN+1]; |
624 |
|
625 |
djw_init_clen_mtf_1_2 (clmtf); |
626 |
|
627 |
djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN+1); |
628 |
} |
629 |
|
630 |
static int |
631 |
djw_encode_prefix (xd3_stream *stream, |
632 |
xd3_output **output, |
633 |
bit_state *bstate, |
634 |
djw_prefix *prefix) |
635 |
{ |
636 |
int ret, i; |
637 |
uint num_to_encode; |
638 |
djw_weight clfreq[DJW_TOTAL_CODES]; |
639 |
uint8_t clclen[DJW_TOTAL_CODES]; |
640 |
uint clcode[DJW_TOTAL_CODES]; |
641 |
|
642 |
IF_TUNE (memset (clfreq, 0, sizeof (clfreq))); |
643 |
|
644 |
/* Move-to-front encode prefix symbols, count frequencies */ |
645 |
djw_compute_prefix_1_2 (prefix, clfreq); |
646 |
|
647 |
/* Compute codes */ |
648 |
djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); |
649 |
djw_build_codes (clcode, clclen, DJW_TOTAL_CODES DEBUG_ARG (DJW_MAX_CLCLEN)); |
650 |
|
651 |
/* Compute number of extra codes beyond basic ones for this template. */ |
652 |
num_to_encode = DJW_TOTAL_CODES; |
653 |
while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) { num_to_encode -= 1; } |
654 |
XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS)); |
655 |
|
656 |
/* Encode: # of extra codes */ |
657 |
if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS, |
658 |
num_to_encode - DJW_EXTRA_12OFFSET))) { return ret; } |
659 |
|
660 |
/* Encode: MTF code lengths */ |
661 |
for (i = 0; i < num_to_encode; i += 1) |
662 |
{ |
663 |
if ((ret = xd3_encode_bits (stream, output, bstate, DJW_CLCLEN_BITS, clclen[i]))) { return ret; } |
664 |
} |
665 |
|
666 |
/* Encode: CLEN code lengths */ |
667 |
for (i = 0; i < prefix->mcount; i += 1) |
668 |
{ |
669 |
usize_t mtf_sym = prefix->mtfsym[i]; |
670 |
usize_t bits = clclen[mtf_sym]; |
671 |
usize_t code = clcode[mtf_sym]; |
672 |
|
673 |
if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) { return ret; } |
674 |
} |
675 |
|
676 |
IF_TUNE (memcpy (tune_freq, clfreq, sizeof (clfreq))); |
677 |
|
678 |
return 0; |
679 |
} |
680 |
|
681 |
static void |
682 |
djw_compute_selector_1_2 (djw_prefix *prefix, |
683 |
usize_t groups, |
684 |
djw_weight *gbest_freq) |
685 |
{ |
686 |
uint8_t grmtf[DJW_MAX_GROUPS]; |
687 |
usize_t i; |
688 |
|
689 |
for (i = 0; i < groups; i += 1) { grmtf[i] = i; } |
690 |
|
691 |
djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups); |
692 |
} |
693 |
|
694 |
static int |
695 |
xd3_encode_howmany_groups (xd3_stream *stream, |
696 |
xd3_sec_cfg *cfg, |
697 |
usize_t input_size, |
698 |
usize_t *ret_groups, |
699 |
usize_t *ret_sector_size) |
700 |
{ |
701 |
usize_t cfg_groups = 0; |
702 |
usize_t cfg_sector_size = 0; |
703 |
usize_t sugg_groups = 0; |
704 |
usize_t sugg_sector_size = 0; |
705 |
|
706 |
if (cfg->ngroups != 0) |
707 |
{ |
708 |
if (cfg->ngroups < 0 || cfg->ngroups > DJW_MAX_GROUPS) |
709 |
{ |
710 |
stream->msg = "invalid secondary encoder group number"; |
711 |
return XD3_INTERNAL; |
712 |
} |
713 |
|
714 |
cfg_groups = cfg->ngroups; |
715 |
} |
716 |
|
717 |
if (cfg->sector_size != 0) |
718 |
{ |
719 |
if (cfg->sector_size < DJW_SECTORSZ_MULT || cfg->sector_size > DJW_SECTORSZ_MAX || (cfg->sector_size % DJW_SECTORSZ_MULT) != 0) |
720 |
{ |
721 |
stream->msg = "invalid secondary encoder sector size"; |
722 |
return XD3_INTERNAL; |
723 |
} |
724 |
|
725 |
cfg_sector_size = cfg->sector_size; |
726 |
} |
727 |
|
728 |
if (cfg_groups == 0 || cfg_sector_size == 0) |
729 |
{ |
730 |
/* These values were found empirically using xdelta3-tune around version |
731 |
* xdfs-0.256. */ |
732 |
switch (cfg->data_type) |
733 |
{ |
734 |
case DATA_SECTION: |
735 |
if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; } |
736 |
else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; } |
737 |
else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; } |
738 |
else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; } |
739 |
else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; } |
740 |
else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; } |
741 |
else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; } |
742 |
else { sugg_groups = 8; sugg_sector_size = 70; } |
743 |
break; |
744 |
case INST_SECTION: |
745 |
if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; } |
746 |
else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; } |
747 |
else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; } |
748 |
else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; } |
749 |
else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; } |
750 |
else { sugg_groups = 8; sugg_sector_size = 40; } |
751 |
break; |
752 |
case ADDR_SECTION: |
753 |
if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; } |
754 |
else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; } |
755 |
else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; } |
756 |
else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; } |
757 |
else { sugg_groups = 7; sugg_sector_size = 130; } |
758 |
break; |
759 |
} |
760 |
|
761 |
if (cfg_groups == 0) |
762 |
{ |
763 |
cfg_groups = sugg_groups; |
764 |
} |
765 |
|
766 |
if (cfg_sector_size == 0) |
767 |
{ |
768 |
cfg_sector_size = sugg_sector_size; |
769 |
} |
770 |
} |
771 |
|
772 |
if (cfg_groups != 1 && cfg_sector_size == 0) |
773 |
{ |
774 |
switch (cfg->data_type) |
775 |
{ |
776 |
case DATA_SECTION: |
777 |
cfg_sector_size = 20; |
778 |
break; |
779 |
case INST_SECTION: |
780 |
cfg_sector_size = 50; |
781 |
break; |
782 |
case ADDR_SECTION: |
783 |
cfg_sector_size = 130; |
784 |
break; |
785 |
} |
786 |
} |
787 |
|
788 |
(*ret_groups) = cfg_groups; |
789 |
(*ret_sector_size) = cfg_sector_size; |
790 |
|
791 |
XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS); |
792 |
XD3_ASSERT (cfg_groups == 1 || (cfg_sector_size >= DJW_SECTORSZ_MULT && cfg_sector_size <= DJW_SECTORSZ_MAX)); |
793 |
|
794 |
return 0; |
795 |
} |
796 |
|
797 |
static int |
798 |
xd3_real_encode_huff (xd3_stream *stream, |
799 |
djw_stream *h, |
800 |
xd3_output *input, |
801 |
xd3_output *output, |
802 |
xd3_sec_cfg *cfg) |
803 |
{ |
804 |
int ret; |
805 |
usize_t groups, sector_size; |
806 |
bit_state bstate = BIT_STATE_ENCODE_INIT; |
807 |
xd3_output *in; |
808 |
int encode_bits; |
809 |
usize_t input_bits; |
810 |
usize_t input_bytes; |
811 |
usize_t initial_offset = output->next; |
812 |
djw_weight real_freq[ALPHABET_SIZE]; |
813 |
uint8_t *gbest = NULL; /* Dynamic allocations: could put these in djw_stream. */ |
814 |
uint8_t *gbest_mtf = NULL; |
815 |
|
816 |
input_bytes = djw_count_freqs (real_freq, input); |
817 |
input_bits = input_bytes * 8; |
818 |
|
819 |
XD3_ASSERT (input_bytes > 0); |
820 |
|
821 |
if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, & groups, & sector_size))) |
822 |
{ |
823 |
return ret; |
824 |
} |
825 |
|
826 |
if (0) |
827 |
{ |
828 |
regroup: |
829 |
/* Sometimes we dynamically decide there are too many groups. Arrive here. */ |
830 |
output->next = initial_offset; |
831 |
xd3_bit_state_encode_init (& bstate); |
832 |
} |
833 |
|
834 |
/* Encode: # of groups (3 bits) */ |
835 |
if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GROUP_BITS, groups-1))) { goto failure; } |
836 |
|
837 |
if (groups == 1) |
838 |
{ |
839 |
/* Single Huffman group. */ |
840 |
uint code[ALPHABET_SIZE]; /* Codes */ |
841 |
IF_TUNE (uint8_t *clen = tune_clen[0];) |
842 |
IF_NTUNE (uint8_t clen[ALPHABET_SIZE];) |
843 |
uint8_t prefix_mtfsym[ALPHABET_SIZE]; |
844 |
djw_prefix prefix; |
845 |
|
846 |
encode_bits = |
847 |
djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); |
848 |
djw_build_codes (code, clen, ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN)); |
849 |
|
850 |
if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } |
851 |
|
852 |
/* Encode: prefix */ |
853 |
prefix.mtfsym = prefix_mtfsym; |
854 |
prefix.symbol = clen; |
855 |
prefix.scount = ALPHABET_SIZE; |
856 |
|
857 |
if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; } |
858 |
|
859 |
if (encode_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } |
860 |
|
861 |
IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate)); |
862 |
IF_TUNE (tune_select_bits = 0); |
863 |
IF_TUNE (tune_encode_bits = encode_bits); |
864 |
|
865 |
/* Encode: data */ |
866 |
for (in = input; in; in = in->next_page) |
867 |
{ |
868 |
const uint8_t *p = in->base; |
869 |
const uint8_t *p_max = p + in->next; |
870 |
|
871 |
do |
872 |
{ |
873 |
usize_t sym = *p++; |
874 |
usize_t bits = clen[sym]; |
875 |
|
876 |
IF_DEBUG (encode_bits -= bits); |
877 |
|
878 |
if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code[sym]))) { goto failure; } |
879 |
} |
880 |
while (p < p_max); |
881 |
} |
882 |
|
883 |
XD3_ASSERT (encode_bits == 0); |
884 |
} |
885 |
else |
886 |
{ |
887 |
/* DJW Huffman */ |
888 |
djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE]; |
889 |
#if TUNE_HUFFMAN == 0 |
890 |
uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE]; |
891 |
#else |
892 |
#define evolve_clen tune_clen |
893 |
#endif |
894 |
djw_weight left = input_bytes; |
895 |
int gp; |
896 |
int niter = 0; |
897 |
usize_t select_bits; |
898 |
usize_t sym1 = 0, sym2 = 0, s; |
899 |
usize_t gcost[DJW_MAX_GROUPS]; |
900 |
uint gbest_code[DJW_MAX_GROUPS+2]; |
901 |
uint8_t gbest_clen[DJW_MAX_GROUPS+2]; |
902 |
usize_t gbest_max = 1 + (input_bytes - 1) / sector_size; |
903 |
int best_bits = 0; |
904 |
usize_t gbest_no; |
905 |
usize_t gpcnt; |
906 |
const uint8_t *p; |
907 |
IF_DEBUG1 (usize_t gcount[DJW_MAX_GROUPS]); |
908 |
|
909 |
/* Encode: sector size (5 bits) */ |
910 |
if ((ret = xd3_encode_bits (stream, & output, & bstate, |
911 |
DJW_SECTORSZ_BITS, (sector_size/DJW_SECTORSZ_MULT)-1))) { goto failure; } |
912 |
|
913 |
/* Dynamic allocation. */ |
914 |
if (gbest == NULL) |
915 |
{ |
916 |
if ((gbest = xd3_alloc (stream, gbest_max, 1)) == NULL) { ret = ENOMEM; goto failure; } |
917 |
} |
918 |
|
919 |
if (gbest_mtf == NULL) |
920 |
{ |
921 |
if ((gbest_mtf = xd3_alloc (stream, gbest_max, 1)) == NULL) { ret = ENOMEM; goto failure; } |
922 |
} |
923 |
|
924 |
/* OPT: Some of the inner loops can be optimized, as shown in bzip2 */ |
925 |
|
926 |
/* Generate initial code length tables. */ |
927 |
for (gp = 0; gp < groups; gp += 1) |
928 |
{ |
929 |
djw_weight sum = 0; |
930 |
djw_weight goal = left / (groups - gp); |
931 |
|
932 |
IF_DEBUG1 (usize_t nz = 0); |
933 |
|
934 |
/* Due to the single-code granularity of this distribution, it may be that we |
935 |
* can't generate a distribution for each group. In that case subtract one |
936 |
* group and try again. If (inefficient), we're testing group behavior, so |
937 |
* don't mess things up. */ |
938 |
if (goal == 0 && !cfg->inefficient) |
939 |
{ |
940 |
IF_DEBUG1 (DP(RINT "too many groups (%u), dropping one\n", groups)); |
941 |
groups -= 1; |
942 |
goto regroup; |
943 |
} |
944 |
|
945 |
/* Sum == goal is possible when (cfg->inefficient)... */ |
946 |
while (sum < goal) |
947 |
{ |
948 |
XD3_ASSERT (sym2 < ALPHABET_SIZE); |
949 |
IF_DEBUG1 (nz += real_freq[sym2] != 0); |
950 |
sum += real_freq[sym2++]; |
951 |
} |
952 |
|
953 |
IF_DEBUG1(DP(RINT "group %u has symbols %u..%u (%u non-zero) (%u/%u = %.3f)\n", |
954 |
gp, sym1, sym2, nz, sum, input_bytes, sum / (double)input_bytes);); |
955 |
|
956 |
for (s = 0; s < ALPHABET_SIZE; s += 1) |
957 |
{ |
958 |
evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16; |
959 |
} |
960 |
|
961 |
left -= sum; |
962 |
sym1 = sym2+1; |
963 |
} |
964 |
|
965 |
repeat: |
966 |
|
967 |
niter += 1; |
968 |
gbest_no = 0; |
969 |
memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups); |
970 |
IF_DEBUG1 (memset (gcount, 0, sizeof (gcount[0]) * groups)); |
971 |
|
972 |
/* For each input page (loop is irregular to allow non-pow2-size group size. */ |
973 |
in = input; |
974 |
p = in->base; |
975 |
|
976 |
/* For each group-size sector. */ |
977 |
do |
978 |
{ |
979 |
const uint8_t *p0 = p; |
980 |
xd3_output *in0 = in; |
981 |
usize_t best = 0; |
982 |
usize_t winner = 0; |
983 |
|
984 |
/* Select best group for each sector, update evolve_freq. */ |
985 |
memset (gcost, 0, sizeof (gcost[0]) * groups); |
986 |
|
987 |
/* For each byte in sector. */ |
988 |
for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) |
989 |
{ |
990 |
/* For each group. */ |
991 |
for (gp = 0; gp < groups; gp += 1) |
992 |
{ |
993 |
gcost[gp] += evolve_clen[gp][*p]; |
994 |
} |
995 |
|
996 |
/* Check end-of-input-page. */ |
997 |
# define GP_PAGE() \ |
998 |
if (++p - in->base == in->next) \ |
999 |
{ \ |
1000 |
in = in->next_page; \ |
1001 |
if (in == NULL) { break; } \ |
1002 |
p = in->base; \ |
1003 |
} |
1004 |
|
1005 |
GP_PAGE (); |
1006 |
} |
1007 |
|
1008 |
/* Find min cost group for this sector */ |
1009 |
best = -1U; |
1010 |
for (gp = 0; gp < groups; gp += 1) |
1011 |
{ |
1012 |
if (gcost[gp] < best) { best = gcost[gp]; winner = gp; } |
1013 |
} |
1014 |
|
1015 |
XD3_ASSERT(gbest_no < gbest_max); |
1016 |
gbest[gbest_no++] = winner; |
1017 |
IF_DEBUG1 (gcount[winner] += 1); |
1018 |
|
1019 |
p = p0; |
1020 |
in = in0; |
1021 |
|
1022 |
/* Update group frequencies. */ |
1023 |
for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) |
1024 |
{ |
1025 |
evolve_freq[winner][*p] += 1; |
1026 |
|
1027 |
GP_PAGE (); |
1028 |
} |
1029 |
} |
1030 |
while (in != NULL); |
1031 |
|
1032 |
XD3_ASSERT (gbest_no == gbest_max); |
1033 |
|
1034 |
/* Recompute code lengths. */ |
1035 |
encode_bits = 0; |
1036 |
for (gp = 0; gp < groups; gp += 1) |
1037 |
{ |
1038 |
int i; |
1039 |
uint8_t evolve_zero[ALPHABET_SIZE]; |
1040 |
int any_zeros = 0; |
1041 |
|
1042 |
memset (evolve_zero, 0, sizeof (evolve_zero)); |
1043 |
|
1044 |
/* Cannot allow a zero clen when the real frequency is non-zero. Note: this |
1045 |
* means we are going to encode a fairly long code for these unused entries. An |
1046 |
* improvement would be to implement a NOTUSED code for when these are actually |
1047 |
* zero, but this requires another data structure (evolve_zero) since we don't |
1048 |
* know when evolve_freq[i] == 0... Briefly tested, looked worse. */ |
1049 |
for (i = 0; i < ALPHABET_SIZE; i += 1) |
1050 |
{ |
1051 |
if (evolve_freq[gp][i] == 0 && real_freq[i] != 0) |
1052 |
{ |
1053 |
evolve_freq[gp][i] = 1; |
1054 |
evolve_zero[i] = 1; |
1055 |
any_zeros = 1; |
1056 |
} |
1057 |
} |
1058 |
|
1059 |
encode_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp], ALPHABET_SIZE, DJW_MAX_CODELEN); |
1060 |
|
1061 |
/* The above faking of frequencies does not matter for the last iteration, but |
1062 |
* we don't know when that is yet. However, it also breaks the encode_bits |
1063 |
* computation. Necessary for accuracy, and for the (encode_bits==0) assert |
1064 |
* after all bits are output. */ |
1065 |
if (any_zeros) |
1066 |
{ |
1067 |
IF_DEBUG1 (usize_t save_total = encode_bits); |
1068 |
|
1069 |
for (i = 0; i < ALPHABET_SIZE; i += 1) |
1070 |
{ |
1071 |
if (evolve_zero[i]) { encode_bits -= evolve_clen[gp][i]; } |
1072 |
} |
1073 |
|
1074 |
IF_DEBUG1 (DP(RINT "evolve_zero reduced %u bits in group %u\n", save_total - encode_bits, gp)); |
1075 |
} |
1076 |
} |
1077 |
|
1078 |
IF_DEBUG1( |
1079 |
DP(RINT "pass %u total bits: %u group uses: ", niter, encode_bits); |
1080 |
for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); } |
1081 |
DP(RINT "\n");); |
1082 |
|
1083 |
/* End iteration. (The following assertion proved invalid.) */ |
1084 |
/*XD3_ASSERT (niter == 1 || best_bits >= encode_bits);*/ |
1085 |
|
1086 |
IF_DEBUG1 (if (niter > 1 && best_bits < encode_bits) { |
1087 |
DP(RINT "iteration lost %u bits\n", encode_bits - best_bits); }); |
1088 |
|
1089 |
if (niter == 1 || (niter < DJW_MAX_ITER && (best_bits - encode_bits) >= DJW_MIN_IMPROVEMENT)) |
1090 |
{ |
1091 |
best_bits = encode_bits; |
1092 |
goto repeat; |
1093 |
} |
1094 |
|
1095 |
/* Efficiency check. */ |
1096 |
if (encode_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } |
1097 |
|
1098 |
IF_DEBUG1 (DP(RINT "djw compression: %u -> %0.3f\n", input_bytes, encode_bits / 8.0)); |
1099 |
|
1100 |
/* Encode: prefix */ |
1101 |
{ |
1102 |
uint8_t prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE]; |
1103 |
uint8_t prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE]; |
1104 |
uint8_t prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE]; |
1105 |
djw_prefix prefix; |
1106 |
|
1107 |
prefix.symbol = prefix_symbol; |
1108 |
prefix.mtfsym = prefix_mtfsym; |
1109 |
prefix.repcnt = prefix_repcnt; |
1110 |
|
1111 |
djw_compute_multi_prefix (groups, evolve_clen, & prefix); |
1112 |
if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix))) { goto failure; } |
1113 |
} |
1114 |
|
1115 |
/* Encode: selector frequencies */ |
1116 |
{ |
1117 |
djw_weight gbest_freq[DJW_MAX_GROUPS+1]; |
1118 |
djw_prefix gbest_prefix; |
1119 |
usize_t i; |
1120 |
|
1121 |
gbest_prefix.scount = gbest_no; |
1122 |
gbest_prefix.symbol = gbest; |
1123 |
gbest_prefix.mtfsym = gbest_mtf; |
1124 |
|
1125 |
djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq); |
1126 |
|
1127 |
select_bits = |
1128 |
djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN); |
1129 |
djw_build_codes (gbest_code, gbest_clen, groups+1 DEBUG_ARG (DJW_MAX_GBCLEN)); |
1130 |
|
1131 |
IF_TUNE (tune_prefix_bits = xd3_bitsof_output (output, & bstate)); |
1132 |
IF_TUNE (tune_select_bits = select_bits); |
1133 |
IF_TUNE (tune_encode_bits = encode_bits); |
1134 |
|
1135 |
for (i = 0; i < groups+1; i += 1) |
1136 |
{ |
1137 |
if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GBCLEN_BITS, gbest_clen[i]))) { goto failure; } |
1138 |
} |
1139 |
|
1140 |
for (i = 0; i < gbest_prefix.mcount; i += 1) |
1141 |
{ |
1142 |
usize_t gp_mtf = gbest_mtf[i]; |
1143 |
usize_t gp_sel_bits = gbest_clen[gp_mtf]; |
1144 |
usize_t gp_sel_code = gbest_code[gp_mtf]; |
1145 |
|
1146 |
XD3_ASSERT (gp_mtf < groups+1); |
1147 |
|
1148 |
if ((ret = xd3_encode_bits (stream, & output, & bstate, gp_sel_bits, gp_sel_code))) { goto failure; } |
1149 |
|
1150 |
IF_DEBUG (select_bits -= gp_sel_bits); |
1151 |
} |
1152 |
|
1153 |
XD3_ASSERT (select_bits == 0); |
1154 |
} |
1155 |
|
1156 |
/* Efficiency check. */ |
1157 |
if (encode_bits + select_bits + (8 * output->next) + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; } |
1158 |
|
1159 |
/* Encode: data */ |
1160 |
{ |
1161 |
uint evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE]; |
1162 |
usize_t sector = 0; |
1163 |
|
1164 |
/* Build code tables for each group. */ |
1165 |
for (gp = 0; gp < groups; gp += 1) |
1166 |
{ |
1167 |
djw_build_codes (evolve_code[gp], evolve_clen[gp], ALPHABET_SIZE DEBUG_ARG (DJW_MAX_CODELEN)); |
1168 |
} |
1169 |
|
1170 |
/* Now loop over the input. */ |
1171 |
in = input; |
1172 |
p = in->base; |
1173 |
|
1174 |
do |
1175 |
{ |
1176 |
/* For each sector. */ |
1177 |
usize_t gp_best = gbest[sector]; |
1178 |
uint *gp_codes = evolve_code[gp_best]; |
1179 |
uint8_t *gp_clens = evolve_clen[gp_best]; |
1180 |
|
1181 |
XD3_ASSERT (sector < gbest_no); |
1182 |
|
1183 |
sector += 1; |
1184 |
|
1185 |
/* Encode the sector data. */ |
1186 |
for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1) |
1187 |
{ |
1188 |
usize_t sym = *p; |
1189 |
usize_t bits = gp_clens[sym]; |
1190 |
usize_t code = gp_codes[sym]; |
1191 |
|
1192 |
IF_DEBUG (encode_bits -= bits); |
1193 |
|
1194 |
if ((ret = xd3_encode_bits (stream, & output, & bstate, bits, code))) { goto failure; } |
1195 |
|
1196 |
GP_PAGE (); |
1197 |
} |
1198 |
} |
1199 |
while (in != NULL); |
1200 |
|
1201 |
XD3_ASSERT (select_bits == 0); |
1202 |
XD3_ASSERT (encode_bits == 0); |
1203 |
|
1204 |
#undef evolve_clen |
1205 |
} |
1206 |
} |
1207 |
|
1208 |
ret = xd3_flush_bits (stream, & output, & bstate); |
1209 |
|
1210 |
if (0) |
1211 |
{ |
1212 |
nosecond: |
1213 |
stream->msg = "secondary compression was inefficient"; |
1214 |
ret = XD3_NOSECOND; |
1215 |
} |
1216 |
|
1217 |
failure: |
1218 |
|
1219 |
xd3_free (stream, gbest); |
1220 |
xd3_free (stream, gbest_mtf); |
1221 |
return ret; |
1222 |
} |
1223 |
#endif /* XD3_ENCODER */ |
1224 |
|
1225 |
/*********************************************************************/ |
1226 |
/* DECODE */ |
1227 |
/*********************************************************************/ |
1228 |
|
1229 |
static void |
1230 |
djw_build_decoder (xd3_stream *stream, |
1231 |
usize_t asize, |
1232 |
usize_t abs_max, |
1233 |
const uint8_t *clen, |
1234 |
uint8_t *inorder, |
1235 |
uint *base, |
1236 |
uint *limit, |
1237 |
uint *min_clenp, |
1238 |
uint *max_clenp) |
1239 |
{ |
1240 |
int i, l; |
1241 |
const uint8_t *ci; |
1242 |
uint nr_clen [DJW_MAX_CODELEN+2]; |
1243 |
uint tmp_base[DJW_MAX_CODELEN+2]; |
1244 |
int min_clen; |
1245 |
int max_clen; |
1246 |
|
1247 |
/* Assumption: the two temporary arrays are large enough to hold abs_max. */ |
1248 |
XD3_ASSERT (abs_max <= DJW_MAX_CODELEN); |
1249 |
|
1250 |
/* This looks something like the start of zlib's inftrees.c */ |
1251 |
memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1)); |
1252 |
|
1253 |
/* Count number of each code length */ |
1254 |
i = asize; |
1255 |
ci = clen; |
1256 |
do |
1257 |
{ |
1258 |
/* Caller _must_ check that values are in-range. Most of the time |
1259 |
* the caller decodes a specific number of bits, which imply the max value, and the |
1260 |
* other time the caller decodes a huffman value, which must be in-range. Therefore, |
1261 |
* its an assertion and this function cannot otherwise fail. */ |
1262 |
XD3_ASSERT (*ci <= abs_max); |
1263 |
|
1264 |
nr_clen[*ci++]++; |
1265 |
} |
1266 |
while (--i != 0); |
1267 |
|
1268 |
/* Compute min, max. */ |
1269 |
for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } } |
1270 |
min_clen = i; |
1271 |
for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } } |
1272 |
max_clen = i; |
1273 |
|
1274 |
/* Fill the BASE, LIMIT table. */ |
1275 |
tmp_base[min_clen] = 0; |
1276 |
base[min_clen] = 0; |
1277 |
limit[min_clen] = nr_clen[min_clen] - 1; |
1278 |
for (i = min_clen + 1; i <= max_clen; i += 1) |
1279 |
{ |
1280 |
uint last_limit = ((limit[i-1] + 1) << 1); |
1281 |
tmp_base[i] = tmp_base[i-1] + nr_clen[i-1]; |
1282 |
limit[i] = last_limit + nr_clen[i] - 1; |
1283 |
base[i] = last_limit - tmp_base[i]; |
1284 |
} |
1285 |
|
1286 |
/* Fill the inorder array, canonically ordered codes. */ |
1287 |
ci = clen; |
1288 |
for (i = 0; i < asize; i += 1) |
1289 |
{ |
1290 |
if ((l = *ci++) != 0) |
1291 |
{ |
1292 |
inorder[tmp_base[l]++] = i; |
1293 |
} |
1294 |
} |
1295 |
|
1296 |
*min_clenp = min_clen; |
1297 |
*max_clenp = max_clen; |
1298 |
} |
1299 |
|
1300 |
static INLINE int |
1301 |
djw_decode_symbol (xd3_stream *stream, |
1302 |
bit_state *bstate, |
1303 |
const uint8_t **input, |
1304 |
const uint8_t *input_end, |
1305 |
const uint8_t *inorder, |
1306 |
const uint *base, |
1307 |
const uint *limit, |
1308 |
uint min_clen, |
1309 |
uint max_clen, |
1310 |
usize_t *sym, |
1311 |
usize_t max_sym) |
1312 |
{ |
1313 |
usize_t code = 0; |
1314 |
usize_t bits = 0; |
1315 |
|
1316 |
/* OPT: Supposedly a small lookup table improves speed here... */ |
1317 |
|
1318 |
/* Code outline is similar to xd3_decode_bits... */ |
1319 |
if (bstate->cur_mask == 0x100) { goto next_byte; } |
1320 |
|
1321 |
for (;;) |
1322 |
{ |
1323 |
do |
1324 |
{ |
1325 |
if (bits == max_clen) { goto corrupt; } |
1326 |
|
1327 |
bits += 1; |
1328 |
code = (code << 1); |
1329 |
|
1330 |
if (bstate->cur_byte & bstate->cur_mask) { code |= 1; } |
1331 |
|
1332 |
bstate->cur_mask <<= 1; |
1333 |
|
1334 |
if (bits >= min_clen && code <= limit[bits]) { goto done; } |
1335 |
} |
1336 |
while (bstate->cur_mask != 0x100); |
1337 |
|
1338 |
next_byte: |
1339 |
|
1340 |
if (*input == input_end) |
1341 |
{ |
1342 |
stream->msg = "secondary decoder end of input"; |
1343 |
return XD3_INTERNAL; |
1344 |
} |
1345 |
|
1346 |
bstate->cur_byte = *(*input)++; |
1347 |
bstate->cur_mask = 1; |
1348 |
} |
1349 |
|
1350 |
done: |
1351 |
|
1352 |
if (base[bits] <= code) |
1353 |
{ |
1354 |
usize_t offset = code - base[bits]; |
1355 |
|
1356 |
if (offset <= max_sym) |
1357 |
{ |
1358 |
IF_DEBUG2 (DP(RINT "(j) %u ", code)); |
1359 |
*sym = inorder[offset]; |
1360 |
return 0; |
1361 |
} |
1362 |
} |
1363 |
|
1364 |
corrupt: |
1365 |
stream->msg = "secondary decoder invalid code"; |
1366 |
return XD3_INTERNAL; |
1367 |
} |
1368 |
|
1369 |
static int |
1370 |
djw_decode_clclen (xd3_stream *stream, |
1371 |
bit_state *bstate, |
1372 |
const uint8_t **input, |
1373 |
const uint8_t *input_end, |
1374 |
uint8_t *cl_inorder, |
1375 |
uint *cl_base, |
1376 |
uint *cl_limit, |
1377 |
uint *cl_minlen, |
1378 |
uint *cl_maxlen, |
1379 |
uint8_t *cl_mtf) |
1380 |
{ |
1381 |
int ret; |
1382 |
uint8_t cl_clen[DJW_TOTAL_CODES]; |
1383 |
usize_t num_codes, value; |
1384 |
int i; |
1385 |
|
1386 |
/* How many extra code lengths to encode. */ |
1387 |
if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_EXTRA_CODE_BITS, & num_codes))) { return ret; } |
1388 |
|
1389 |
num_codes += DJW_EXTRA_12OFFSET; |
1390 |
|
1391 |
/* Read num_codes. */ |
1392 |
for (i = 0; i < num_codes; i += 1) |
1393 |
{ |
1394 |
if ((ret = xd3_decode_bits (stream, bstate, input, input_end, DJW_CLCLEN_BITS, & value))) { return ret; } |
1395 |
|
1396 |
cl_clen[i] = value; |
1397 |
} |
1398 |
|
1399 |
/* Set the rest to zero. */ |
1400 |
for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; } |
1401 |
|
1402 |
/* No need to check for in-range clen values, because: */ |
1403 |
XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1); |
1404 |
|
1405 |
/* Build the code-length decoder. */ |
1406 |
djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN, |
1407 |
cl_clen, cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen); |
1408 |
|
1409 |
/* Initialize the MTF state. */ |
1410 |
djw_init_clen_mtf_1_2 (cl_mtf); |
1411 |
|
1412 |
return 0; |
1413 |
} |
1414 |
|
1415 |
static INLINE int |
1416 |
djw_decode_1_2 (xd3_stream *stream, |
1417 |
bit_state *bstate, |
1418 |
const uint8_t **input, |
1419 |
const uint8_t *input_end, |
1420 |
const uint8_t *inorder, |
1421 |
const uint *base, |
1422 |
const uint *limit, |
1423 |
const uint *minlen, |
1424 |
const uint *maxlen, |
1425 |
uint8_t *mtfvals, |
1426 |
usize_t elts, |
1427 |
usize_t skip_offset, |
1428 |
uint8_t *values) |
1429 |
{ |
1430 |
usize_t n = 0, rep = 0, mtf = 0, s = 0; |
1431 |
int ret; |
1432 |
|
1433 |
while (n < elts) |
1434 |
{ |
1435 |
/* Special case inside generic code: CLEN only: If not the first group, we already |
1436 |
* know the zero frequencies. */ |
1437 |
if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0) |
1438 |
{ |
1439 |
values[n++] = 0; |
1440 |
continue; |
1441 |
} |
1442 |
|
1443 |
/* Repeat last symbol. */ |
1444 |
if (rep != 0) |
1445 |
{ |
1446 |
values[n++] = mtfvals[0]; |
1447 |
rep -= 1; |
1448 |
continue; |
1449 |
} |
1450 |
|
1451 |
/* Symbol following last repeat code. */ |
1452 |
if (mtf != 0) |
1453 |
{ |
1454 |
usize_t sym = djw_update_mtf (mtfvals, mtf); |
1455 |
values[n++] = sym; |
1456 |
mtf = 0; |
1457 |
continue; |
1458 |
} |
1459 |
|
1460 |
/* Decode next symbol/repeat code. */ |
1461 |
if ((ret = djw_decode_symbol (stream, bstate, input, input_end, |
1462 |
inorder, base, limit, *minlen, *maxlen, |
1463 |
& mtf, DJW_TOTAL_CODES))) { return ret; } |
1464 |
|
1465 |
if (mtf <= RUN_1) |
1466 |
{ |
1467 |
/* Repetition. */ |
1468 |
rep = ((mtf + 1) << s); |
1469 |
mtf = 0; |
1470 |
s += 1; |
1471 |
} |
1472 |
else |
1473 |
{ |
1474 |
/* Remove the RUN_1 MTF offset. */ |
1475 |
mtf -= 1; |
1476 |
s = 0; |
1477 |
} |
1478 |
} |
1479 |
|
1480 |
/* If (rep != 0) there were too many codes received. */ |
1481 |
if (rep != 0) |
1482 |
{ |
1483 |
stream->msg = "secondary decoder invalid repeat code"; |
1484 |
return XD3_INTERNAL; |
1485 |
} |
1486 |
|
1487 |
return 0; |
1488 |
} |
1489 |
|
1490 |
static INLINE int |
1491 |
djw_decode_prefix (xd3_stream *stream, |
1492 |
bit_state *bstate, |
1493 |
const uint8_t **input, |
1494 |
const uint8_t *input_end, |
1495 |
const uint8_t *cl_inorder, |
1496 |
const uint *cl_base, |
1497 |
const uint *cl_limit, |
1498 |
const uint *cl_minlen, |
1499 |
const uint *cl_maxlen, |
1500 |
uint8_t *cl_mtf, |
1501 |
usize_t groups, |
1502 |
uint8_t *clen) |
1503 |
{ |
1504 |
return djw_decode_1_2 (stream, bstate, input, input_end, |
1505 |
cl_inorder, cl_base, cl_limit, cl_minlen, cl_maxlen, cl_mtf, |
1506 |
ALPHABET_SIZE * groups, ALPHABET_SIZE, clen); |
1507 |
} |
1508 |
|
1509 |
static int |
1510 |
xd3_decode_huff (xd3_stream *stream, |
1511 |
djw_stream *h, |
1512 |
const uint8_t **input_pos, |
1513 |
const uint8_t *const input_end, |
1514 |
uint8_t **output_pos, |
1515 |
const uint8_t *const output_end) |
1516 |
{ |
1517 |
const uint8_t *input = *input_pos; |
1518 |
uint8_t *output = *output_pos; |
1519 |
bit_state bstate = BIT_STATE_DECODE_INIT; |
1520 |
uint8_t *sel_group = NULL; |
1521 |
usize_t groups, gp; |
1522 |
usize_t output_bytes = (output_end - output); |
1523 |
usize_t sector_size; |
1524 |
usize_t sectors; |
1525 |
int ret; |
1526 |
|
1527 |
/* Invalid input. */ |
1528 |
if (output_bytes == 0) |
1529 |
{ |
1530 |
stream->msg = "secondary decoder invalid input"; |
1531 |
return XD3_INTERNAL; |
1532 |
} |
1533 |
|
1534 |
/* Decode: number of groups */ |
1535 |
if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GROUP_BITS, & groups))) { goto fail; } |
1536 |
|
1537 |
groups += 1; |
1538 |
|
1539 |
if (groups > 1) |
1540 |
{ |
1541 |
/* Decode: group size */ |
1542 |
if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_SECTORSZ_BITS, & sector_size))) { goto fail; } |
1543 |
|
1544 |
sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT; |
1545 |
} |
1546 |
else |
1547 |
{ |
1548 |
/* Default for groups == 1 */ |
1549 |
sector_size = output_bytes; |
1550 |
} |
1551 |
|
1552 |
sectors = 1 + (output_bytes - 1) / sector_size; |
1553 |
|
1554 |
/* @!@ In the case of groups==1, lots of extra stack space gets used here. Could |
1555 |
* dynamically allocate this memory, which would help with excess parameter passing, |
1556 |
* too. Passing too many parameters in this file, simplify it! */ |
1557 |
|
1558 |
/* Outer scope: per-group symbol decoder tables. */ |
1559 |
{ |
1560 |
uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE]; |
1561 |
uint base [DJW_MAX_GROUPS][DJW_MAX_CODELEN+2]; |
1562 |
uint limit [DJW_MAX_GROUPS][DJW_MAX_CODELEN+2]; |
1563 |
uint minlen [DJW_MAX_GROUPS]; |
1564 |
uint maxlen [DJW_MAX_GROUPS]; |
1565 |
|
1566 |
/* Nested scope: code length decoder tables. */ |
1567 |
{ |
1568 |
uint8_t clen [DJW_MAX_GROUPS][ALPHABET_SIZE]; |
1569 |
uint8_t cl_inorder[DJW_TOTAL_CODES]; |
1570 |
uint cl_base [DJW_MAX_CLCLEN+2]; |
1571 |
uint cl_limit [DJW_MAX_CLCLEN+2]; |
1572 |
uint8_t cl_mtf [DJW_TOTAL_CODES]; |
1573 |
uint cl_minlen; |
1574 |
uint cl_maxlen; |
1575 |
|
1576 |
/* Compute the code length decoder. */ |
1577 |
if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end, |
1578 |
cl_inorder, cl_base, cl_limit, & cl_minlen, |
1579 |
& cl_maxlen, cl_mtf))) { goto fail; } |
1580 |
|
1581 |
/* Now decode each group decoder. */ |
1582 |
if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end, |
1583 |
cl_inorder, cl_base, cl_limit, |
1584 |
& cl_minlen, & cl_maxlen, cl_mtf, |
1585 |
groups, clen[0]))) { goto fail; } |
1586 |
|
1587 |
/* Prepare the actual decoding tables. */ |
1588 |
for (gp = 0; gp < groups; gp += 1) |
1589 |
{ |
1590 |
djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN, |
1591 |
clen[gp], inorder[gp], base[gp], limit[gp], |
1592 |
& minlen[gp], & maxlen[gp]); |
1593 |
} |
1594 |
} |
1595 |
|
1596 |
/* Decode: selector clens. */ |
1597 |
{ |
1598 |
uint8_t sel_inorder[DJW_MAX_GROUPS+2]; |
1599 |
uint sel_base [DJW_MAX_GBCLEN+2]; |
1600 |
uint sel_limit [DJW_MAX_GBCLEN+2]; |
1601 |
uint8_t sel_mtf [DJW_MAX_GROUPS+2]; |
1602 |
uint sel_minlen; |
1603 |
uint sel_maxlen; |
1604 |
|
1605 |
/* Setup group selection. */ |
1606 |
if (groups > 1) |
1607 |
{ |
1608 |
uint8_t sel_clen[DJW_MAX_GROUPS+1]; |
1609 |
|
1610 |
for (gp = 0; gp < groups+1; gp += 1) |
1611 |
{ |
1612 |
usize_t value; |
1613 |
|
1614 |
if ((ret = xd3_decode_bits (stream, & bstate, & input, input_end, DJW_GBCLEN_BITS, & value))) { goto fail; } |
1615 |
|
1616 |
sel_clen[gp] = value; |
1617 |
sel_mtf[gp] = gp; |
1618 |
} |
1619 |
|
1620 |
if ((sel_group = xd3_alloc (stream, sectors, 1)) == NULL) { ret = ENOMEM; goto fail; } |
1621 |
|
1622 |
djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen, |
1623 |
sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen); |
1624 |
|
1625 |
if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end, |
1626 |
sel_inorder, sel_base, sel_limit, & sel_minlen, & sel_maxlen, sel_mtf, |
1627 |
sectors, 0, sel_group))) { goto fail; } |
1628 |
} |
1629 |
|
1630 |
/* Now decode each sector. */ |
1631 |
{ |
1632 |
uint8_t *gp_inorder = inorder[0]; /* Initialize for (groups==1) case. */ |
1633 |
uint *gp_base = base[0]; |
1634 |
uint *gp_limit = limit[0]; |
1635 |
uint gp_minlen = minlen[0]; |
1636 |
uint gp_maxlen = maxlen[0]; |
1637 |
usize_t c; |
1638 |
|
1639 |
for (c = 0; c < sectors; c += 1) |
1640 |
{ |
1641 |
usize_t n; |
1642 |
|
1643 |
if (groups >= 2) |
1644 |
{ |
1645 |
gp = sel_group[c]; |
1646 |
|
1647 |
XD3_ASSERT (gp < groups); |
1648 |
|
1649 |
gp_inorder = inorder[gp]; |
1650 |
gp_base = base[gp]; |
1651 |
gp_limit = limit[gp]; |
1652 |
gp_minlen = minlen[gp]; |
1653 |
gp_maxlen = maxlen[gp]; |
1654 |
} |
1655 |
|
1656 |
XD3_ASSERT (output_end - output > 0); |
1657 |
|
1658 |
/* Decode next sector. */ |
1659 |
n = min (sector_size, (usize_t) (output_end - output)); |
1660 |
|
1661 |
do |
1662 |
{ |
1663 |
usize_t sym; |
1664 |
|
1665 |
if ((ret = djw_decode_symbol (stream, & bstate, & input, input_end, |
1666 |
gp_inorder, gp_base, gp_limit, gp_minlen, gp_maxlen, |
1667 |
& sym, ALPHABET_SIZE))) { goto fail; } |
1668 |
|
1669 |
*output++ = sym; |
1670 |
} |
1671 |
while (--n); |
1672 |
} |
1673 |
} |
1674 |
} |
1675 |
} |
1676 |
|
1677 |
IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate))) { goto fail; }); |
1678 |
XD3_ASSERT (ret == 0); |
1679 |
|
1680 |
fail: |
1681 |
xd3_free (stream, sel_group); |
1682 |
|
1683 |
(*input_pos) = input; |
1684 |
(*output_pos) = output; |
1685 |
return ret; |
1686 |
} |
1687 |
|
1688 |
/*********************************************************************/ |
1689 |
/* TUNING */ |
1690 |
/*********************************************************************/ |
1691 |
|
1692 |
#if TUNE_HUFFMAN && XD3_ENCODER |
1693 |
#include <stdio.h> |
1694 |
#include "xdelta3-fgk.h" |
1695 |
|
1696 |
static uint |
1697 |
xd3_bitsof_output (xd3_output *output, bit_state *bstate) |
1698 |
{ |
1699 |
uint x = 0; |
1700 |
uint m = bstate->cur_mask; |
1701 |
|
1702 |
while (m != 1) |
1703 |
{ |
1704 |
x += 1; |
1705 |
m >>= 1; |
1706 |
} |
1707 |
|
1708 |
return x + 8 * xd3_sizeof_output (output); |
1709 |
} |
1710 |
|
1711 |
static const char* xd3_sect_type (xd3_section_type type) |
1712 |
{ |
1713 |
switch (type) |
1714 |
{ |
1715 |
case DATA_SECTION: return "DATA"; |
1716 |
case INST_SECTION: return "INST"; |
1717 |
case ADDR_SECTION: return "ADDR"; |
1718 |
} |
1719 |
abort (); |
1720 |
} |
1721 |
|
1722 |
static int |
1723 |
xd3_encode_huff (xd3_stream *stream, |
1724 |
djw_stream *h, |
1725 |
xd3_output *input, |
1726 |
xd3_output *unused_output, |
1727 |
xd3_sec_cfg *cfg) |
1728 |
{ |
1729 |
int ret = 0; |
1730 |
int input_size = xd3_sizeof_output (input); |
1731 |
static int hdr = 0; |
1732 |
const char *sect_type = xd3_sect_type (cfg->data_type); |
1733 |
xd3_output *output; |
1734 |
usize_t output_size; |
1735 |
|
1736 |
if (hdr == 0) { hdr = 1; DP(RINT "____ SECT INSZ SECTORSZ GPNO OUTSZ PREFIX SELECT ENCODE\n"); } |
1737 |
|
1738 |
DP(RINT "SECTION %s %u\n", sect_type, input_size); |
1739 |
|
1740 |
{ |
1741 |
int gp, i; |
1742 |
int best_size = 99999999; |
1743 |
usize_t best_prefix = 0, best_select = 0, best_encode = 0, best_sector_size = 0; |
1744 |
int best_gpno = -1; |
1745 |
const char *t12 = "12"; |
1746 |
usize_t clen_count[DJW_MAX_CODELEN+1]; |
1747 |
djw_weight best_freq[DJW_TOTAL_CODES]; |
1748 |
|
1749 |
for (cfg->ngroups = 1; cfg->ngroups <= /*1*/ DJW_MAX_GROUPS; cfg->ngroups += 1) |
1750 |
{ |
1751 |
for (cfg->sector_size = 10; cfg->sector_size <= DJW_SECTORSZ_MAX; cfg->sector_size += 10) |
1752 |
{ |
1753 |
output = xd3_alloc_output (stream, NULL); |
1754 |
|
1755 |
if ((ret = xd3_real_encode_huff (stream, h, input, output, cfg))) { goto fail; } |
1756 |
|
1757 |
output_size = xd3_sizeof_output (output); |
1758 |
|
1759 |
if (output_size < best_size) |
1760 |
{ |
1761 |
best_size = output_size; |
1762 |
best_gpno = cfg->ngroups; |
1763 |
best_prefix = tune_prefix_bits; |
1764 |
best_select = tune_select_bits; |
1765 |
best_encode = tune_encode_bits; |
1766 |
best_sector_size = cfg->sector_size; |
1767 |
memset (clen_count, 0, sizeof (clen_count)); |
1768 |
|
1769 |
for (gp = 0; gp < cfg->ngroups; gp += 1) |
1770 |
{ |
1771 |
for (i = 0; i < ALPHABET_SIZE; i += 1) |
1772 |
{ |
1773 |
clen_count[tune_clen[gp][i]] += 1; |
1774 |
} |
1775 |
} |
1776 |
|
1777 |
memcpy (best_freq, tune_freq, sizeof (tune_freq)); |
1778 |
|
1779 |
XD3_ASSERT (sizeof (tune_freq) == sizeof (mtf_freq)); |
1780 |
} |
1781 |
|
1782 |
if (1) |
1783 |
{ |
1784 |
DP(RINT "COMP%s %u %u %u %u %u %u\n", |
1785 |
t12, cfg->ngroups, cfg->sector_size, |
1786 |
output_size, tune_prefix_bits, tune_select_bits, tune_encode_bits); |
1787 |
} |
1788 |
else |
1789 |
{ |
1790 |
fail: |
1791 |
DP(RINT "COMP%s %u %u %u %u %u %u\n", |
1792 |
t12, cfg->ngroups, cfg->sector_size, |
1793 |
input_size, 0, 0, 0); |
1794 |
} |
1795 |
|
1796 |
xd3_free_output (stream, output); |
1797 |
|
1798 |
XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND); |
1799 |
|
1800 |
if (cfg->ngroups == 1) { break; } |
1801 |
} |
1802 |
} |
1803 |
|
1804 |
if (best_gpno > 0) |
1805 |
{ |
1806 |
DP(RINT "BEST%s %u %u %u %u %u %u\n", |
1807 |
t12, best_gpno, best_sector_size, |
1808 |
best_size, best_prefix, best_select, best_encode); |
1809 |
|
1810 |
#if 0 |
1811 |
DP(RINT "CLEN%s ", t12); |
1812 |
for (i = 1; i <= DJW_MAX_CODELEN; i += 1) |
1813 |
{ |
1814 |
DP(RINT "%u ", clen_count[i]); |
1815 |
} |
1816 |
DP(RINT "\n"); |
1817 |
|
1818 |
DP(RINT "FREQ%s ", t12); |
1819 |
for (i = 0; i < DJW_TOTAL_CODES; i += 1) |
1820 |
{ |
1821 |
DP(RINT "%u ", tune_freq[i]); |
1822 |
} |
1823 |
DP(RINT "\n"); |
1824 |
#endif |
1825 |
} |
1826 |
} |
1827 |
|
1828 |
/* Compare to split single-table windows. */ |
1829 |
{ |
1830 |
int parts, i; |
1831 |
|
1832 |
cfg->ngroups = 1; |
1833 |
|
1834 |
for (parts = 2; parts <= DJW_MAX_GROUPS; parts += 1) |
1835 |
{ |
1836 |
usize_t part_size = input_size / parts; |
1837 |
xd3_output *inp = input, *partin, *partin_head; |
1838 |
usize_t off = 0; |
1839 |
usize_t part_total = 0; |
1840 |
|
1841 |
if (part_size < 1000) { break; } |
1842 |
|
1843 |
for (i = 0; i < parts; i += 1) |
1844 |
{ |
1845 |
usize_t inc; |
1846 |
|
1847 |
partin = partin_head = xd3_alloc_output (stream, NULL); |
1848 |
output = xd3_alloc_output (stream, NULL); |
1849 |
|
1850 |
for (inc = 0; ((i < parts-1) && inc < part_size) || |
1851 |
((i == parts-1) && inp != NULL); ) |
1852 |
{ |
1853 |
usize_t take; |
1854 |
|
1855 |
if (i < parts-1) |
1856 |
{ |
1857 |
take = min (part_size - inc, inp->next - off); |
1858 |
} |
1859 |
else |
1860 |
{ |
1861 |
take = inp->next - off; |
1862 |
} |
1863 |
|
1864 |
ret = xd3_emit_bytes (stream, & partin, inp->base + off, take); |
1865 |
|
1866 |
off += take; |
1867 |
inc += take; |
1868 |
|
1869 |
if (off == inp->next) |
1870 |
{ |
1871 |
inp = inp->next_page; |
1872 |
off = 0; |
1873 |
} |
1874 |
} |
1875 |
|
1876 |
ret = xd3_real_encode_huff (stream, h, partin_head, output, cfg); |
1877 |
|
1878 |
part_total += xd3_sizeof_output (output); |
1879 |
|
1880 |
xd3_free_output (stream, partin_head); |
1881 |
xd3_free_output (stream, output); |
1882 |
|
1883 |
XD3_ASSERT (ret == 0 || ret == XD3_NOSECOND); |
1884 |
|
1885 |
if (ret == XD3_NOSECOND) |
1886 |
{ |
1887 |
break; |
1888 |
} |
1889 |
} |
1890 |
|
1891 |
if (ret != XD3_NOSECOND) |
1892 |
{ |
1893 |
DP(RINT "PART %u %u\n", parts, part_total); |
1894 |
} |
1895 |
} |
1896 |
} |
1897 |
|
1898 |
/* Compare to FGK */ |
1899 |
{ |
1900 |
fgk_stream *fgk = fgk_alloc (stream); |
1901 |
|
1902 |
fgk_init (fgk); |
1903 |
|
1904 |
output = xd3_alloc_output (stream, NULL); |
1905 |
|
1906 |
ret = xd3_encode_fgk (stream, fgk, input, output, NULL); |
1907 |
|
1908 |
output_size = xd3_sizeof_output (output); |
1909 |
xd3_free_output (stream, output); |
1910 |
fgk_destroy (stream, fgk); |
1911 |
|
1912 |
XD3_ASSERT (ret == 0); |
1913 |
|
1914 |
DP(RINT "FGK %u\n", output_size); |
1915 |
} |
1916 |
|
1917 |
DP(RINT "END_SECTION %s %u\n", sect_type, input_size); |
1918 |
|
1919 |
return 0; |
1920 |
} |
1921 |
#endif |
1922 |
|
1923 |
#endif |