| 1 |
/* fitblk.c: example of fitting compressed output to a specified size |
| 2 |
Not copyrighted -- provided to the public domain |
| 3 |
Version 1.1 25 November 2004 Mark Adler */ |
| 4 |
|
| 5 |
/* Version history: |
| 6 |
1.0 24 Nov 2004 First version |
| 7 |
1.1 25 Nov 2004 Change deflateInit2() to deflateInit() |
| 8 |
Use fixed-size, stack-allocated raw buffers |
| 9 |
Simplify code moving compression to subroutines |
| 10 |
Use assert() for internal errors |
| 11 |
Add detailed description of approach |
| 12 |
*/ |
| 13 |
|
| 14 |
/* Approach to just fitting a requested compressed size: |
| 15 |
|
| 16 |
fitblk performs three compression passes on a portion of the input |
| 17 |
data in order to determine how much of that input will compress to |
| 18 |
nearly the requested output block size. The first pass generates |
| 19 |
enough deflate blocks to produce output to fill the requested |
| 20 |
output size plus a specfied excess amount (see the EXCESS define |
| 21 |
below). The last deflate block may go quite a bit past that, but |
| 22 |
is discarded. The second pass decompresses and recompresses just |
| 23 |
the compressed data that fit in the requested plus excess sized |
| 24 |
buffer. The deflate process is terminated after that amount of |
| 25 |
input, which is less than the amount consumed on the first pass. |
| 26 |
The last deflate block of the result will be of a comparable size |
| 27 |
to the final product, so that the header for that deflate block and |
| 28 |
the compression ratio for that block will be about the same as in |
| 29 |
the final product. The third compression pass decompresses the |
| 30 |
result of the second step, but only the compressed data up to the |
| 31 |
requested size minus an amount to allow the compressed stream to |
| 32 |
complete (see the MARGIN define below). That will result in a |
| 33 |
final compressed stream whose length is less than or equal to the |
| 34 |
requested size. Assuming sufficient input and a requested size |
| 35 |
greater than a few hundred bytes, the shortfall will typically be |
| 36 |
less than ten bytes. |
| 37 |
|
| 38 |
If the input is short enough that the first compression completes |
| 39 |
before filling the requested output size, then that compressed |
| 40 |
stream is return with no recompression. |
| 41 |
|
| 42 |
EXCESS is chosen to be just greater than the shortfall seen in a |
| 43 |
two pass approach similar to the above. That shortfall is due to |
| 44 |
the last deflate block compressing more efficiently with a smaller |
| 45 |
header on the second pass. EXCESS is set to be large enough so |
| 46 |
that there is enough uncompressed data for the second pass to fill |
| 47 |
out the requested size, and small enough so that the final deflate |
| 48 |
block of the second pass will be close in size to the final deflate |
| 49 |
block of the third and final pass. MARGIN is chosen to be just |
| 50 |
large enough to assure that the final compression has enough room |
| 51 |
to complete in all cases. |
| 52 |
*/ |
| 53 |
|
| 54 |
#include <stdio.h> |
| 55 |
#include <stdlib.h> |
| 56 |
#include <assert.h> |
| 57 |
#include "zlib.h" |
| 58 |
|
| 59 |
#define local static |
| 60 |
|
| 61 |
/* print nastygram and leave */ |
| 62 |
local void quit(char *why) |
| 63 |
{ |
| 64 |
fprintf(stderr, "fitblk abort: %s\n", why); |
| 65 |
exit(1); |
| 66 |
} |
| 67 |
|
| 68 |
#define RAWLEN 4096 /* intermediate uncompressed buffer size */ |
| 69 |
|
| 70 |
/* compress from file to def until provided buffer is full or end of |
| 71 |
input reached; return last deflate() return value, or Z_ERRNO if |
| 72 |
there was read error on the file */ |
| 73 |
local int partcompress(FILE *in, z_streamp def) |
| 74 |
{ |
| 75 |
int ret, flush; |
| 76 |
unsigned char raw[RAWLEN]; |
| 77 |
|
| 78 |
flush = Z_NO_FLUSH; |
| 79 |
do { |
| 80 |
def->avail_in = fread(raw, 1, RAWLEN, in); |
| 81 |
if (ferror(in)) |
| 82 |
return Z_ERRNO; |
| 83 |
def->next_in = raw; |
| 84 |
if (feof(in)) |
| 85 |
flush = Z_FINISH; |
| 86 |
ret = deflate(def, flush); |
| 87 |
assert(ret != Z_STREAM_ERROR); |
| 88 |
} while (def->avail_out != 0 && flush == Z_NO_FLUSH); |
| 89 |
return ret; |
| 90 |
} |
| 91 |
|
| 92 |
/* recompress from inf's input to def's output; the input for inf and |
| 93 |
the output for def are set in those structures before calling; |
| 94 |
return last deflate() return value, or Z_MEM_ERROR if inflate() |
| 95 |
was not able to allocate enough memory when it needed to */ |
| 96 |
local int recompress(z_streamp inf, z_streamp def) |
| 97 |
{ |
| 98 |
int ret, flush; |
| 99 |
unsigned char raw[RAWLEN]; |
| 100 |
|
| 101 |
flush = Z_NO_FLUSH; |
| 102 |
do { |
| 103 |
/* decompress */ |
| 104 |
inf->avail_out = RAWLEN; |
| 105 |
inf->next_out = raw; |
| 106 |
ret = inflate(inf, Z_NO_FLUSH); |
| 107 |
assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && |
| 108 |
ret != Z_NEED_DICT); |
| 109 |
if (ret == Z_MEM_ERROR) |
| 110 |
return ret; |
| 111 |
|
| 112 |
/* compress what was decompresed until done or no room */ |
| 113 |
def->avail_in = RAWLEN - inf->avail_out; |
| 114 |
def->next_in = raw; |
| 115 |
if (inf->avail_out != 0) |
| 116 |
flush = Z_FINISH; |
| 117 |
ret = deflate(def, flush); |
| 118 |
assert(ret != Z_STREAM_ERROR); |
| 119 |
} while (ret != Z_STREAM_END && def->avail_out != 0); |
| 120 |
return ret; |
| 121 |
} |
| 122 |
|
| 123 |
#define EXCESS 256 /* empirically determined stream overage */ |
| 124 |
#define MARGIN 8 /* amount to back off for completion */ |
| 125 |
|
| 126 |
/* compress from stdin to fixed-size block on stdout */ |
| 127 |
int main(int argc, char **argv) |
| 128 |
{ |
| 129 |
int ret; /* return code */ |
| 130 |
unsigned size; /* requested fixed output block size */ |
| 131 |
unsigned have; /* bytes written by deflate() call */ |
| 132 |
unsigned char *blk; /* intermediate and final stream */ |
| 133 |
unsigned char *tmp; /* close to desired size stream */ |
| 134 |
z_stream def, inf; /* zlib deflate and inflate states */ |
| 135 |
|
| 136 |
/* get requested output size */ |
| 137 |
if (argc != 2) |
| 138 |
quit("need one argument: size of output block"); |
| 139 |
ret = strtol(argv[1], argv + 1, 10); |
| 140 |
if (argv[1][0] != 0) |
| 141 |
quit("argument must be a number"); |
| 142 |
if (ret < 8) /* 8 is minimum zlib stream size */ |
| 143 |
quit("need positive size of 8 or greater"); |
| 144 |
size = (unsigned)ret; |
| 145 |
|
| 146 |
/* allocate memory for buffers and compression engine */ |
| 147 |
blk = malloc(size + EXCESS); |
| 148 |
def.zalloc = Z_NULL; |
| 149 |
def.zfree = Z_NULL; |
| 150 |
def.opaque = Z_NULL; |
| 151 |
ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); |
| 152 |
if (ret != Z_OK || blk == NULL) |
| 153 |
quit("out of memory"); |
| 154 |
|
| 155 |
/* compress from stdin until output full, or no more input */ |
| 156 |
def.avail_out = size + EXCESS; |
| 157 |
def.next_out = blk; |
| 158 |
ret = partcompress(stdin, &def); |
| 159 |
if (ret == Z_ERRNO) |
| 160 |
quit("error reading input"); |
| 161 |
|
| 162 |
/* if it all fit, then size was undersubscribed -- done! */ |
| 163 |
if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { |
| 164 |
/* write block to stdout */ |
| 165 |
have = size + EXCESS - def.avail_out; |
| 166 |
if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) |
| 167 |
quit("error writing output"); |
| 168 |
|
| 169 |
/* clean up and print results to stderr */ |
| 170 |
ret = deflateEnd(&def); |
| 171 |
assert(ret != Z_STREAM_ERROR); |
| 172 |
free(blk); |
| 173 |
fprintf(stderr, |
| 174 |
"%u bytes unused out of %u requested (all input)\n", |
| 175 |
size - have, size); |
| 176 |
return 0; |
| 177 |
} |
| 178 |
|
| 179 |
/* it didn't all fit -- set up for recompression */ |
| 180 |
inf.zalloc = Z_NULL; |
| 181 |
inf.zfree = Z_NULL; |
| 182 |
inf.opaque = Z_NULL; |
| 183 |
inf.avail_in = 0; |
| 184 |
inf.next_in = Z_NULL; |
| 185 |
ret = inflateInit(&inf); |
| 186 |
tmp = malloc(size + EXCESS); |
| 187 |
if (ret != Z_OK || tmp == NULL) |
| 188 |
quit("out of memory"); |
| 189 |
ret = deflateReset(&def); |
| 190 |
assert(ret != Z_STREAM_ERROR); |
| 191 |
|
| 192 |
/* do first recompression close to the right amount */ |
| 193 |
inf.avail_in = size + EXCESS; |
| 194 |
inf.next_in = blk; |
| 195 |
def.avail_out = size + EXCESS; |
| 196 |
def.next_out = tmp; |
| 197 |
ret = recompress(&inf, &def); |
| 198 |
if (ret == Z_MEM_ERROR) |
| 199 |
quit("out of memory"); |
| 200 |
|
| 201 |
/* set up for next reocmpression */ |
| 202 |
ret = inflateReset(&inf); |
| 203 |
assert(ret != Z_STREAM_ERROR); |
| 204 |
ret = deflateReset(&def); |
| 205 |
assert(ret != Z_STREAM_ERROR); |
| 206 |
|
| 207 |
/* do second and final recompression (third compression) */ |
| 208 |
inf.avail_in = size - MARGIN; /* assure stream will complete */ |
| 209 |
inf.next_in = tmp; |
| 210 |
def.avail_out = size; |
| 211 |
def.next_out = blk; |
| 212 |
ret = recompress(&inf, &def); |
| 213 |
if (ret == Z_MEM_ERROR) |
| 214 |
quit("out of memory"); |
| 215 |
assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ |
| 216 |
|
| 217 |
/* done -- write block to stdout */ |
| 218 |
have = size - def.avail_out; |
| 219 |
if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) |
| 220 |
quit("error writing output"); |
| 221 |
|
| 222 |
/* clean up and print results to stderr */ |
| 223 |
free(tmp); |
| 224 |
ret = inflateEnd(&inf); |
| 225 |
assert(ret != Z_STREAM_ERROR); |
| 226 |
ret = deflateEnd(&def); |
| 227 |
assert(ret != Z_STREAM_ERROR); |
| 228 |
free(blk); |
| 229 |
fprintf(stderr, |
| 230 |
"%u bytes unused out of %u requested (%lu input)\n", |
| 231 |
size - have, size, def.total_in); |
| 232 |
return 0; |
| 233 |
} |