1 |
/* gzread.c -- zlib functions for reading gzip files |
2 |
* Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler |
3 |
* For conditions of distribution and use, see copyright notice in zlib.h |
4 |
*/ |
5 |
|
6 |
#include "gzguts.h" |
7 |
|
8 |
/* Local functions */ |
9 |
local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *)); |
10 |
local int gz_avail OF((gz_statep)); |
11 |
local int gz_look OF((gz_statep)); |
12 |
local int gz_decomp OF((gz_statep)); |
13 |
local int gz_fetch OF((gz_statep)); |
14 |
local int gz_skip OF((gz_statep, z_off64_t)); |
15 |
|
16 |
/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from |
17 |
state->fd, and update state->eof, state->err, and state->msg as appropriate. |
18 |
This function needs to loop on read(), since read() is not guaranteed to |
19 |
read the number of bytes requested, depending on the type of descriptor. */ |
20 |
local int gz_load(state, buf, len, have) |
21 |
gz_statep state; |
22 |
unsigned char *buf; |
23 |
unsigned len; |
24 |
unsigned *have; |
25 |
{ |
26 |
int ret; |
27 |
|
28 |
*have = 0; |
29 |
do { |
30 |
ret = read(state->fd, buf + *have, len - *have); |
31 |
if (ret <= 0) |
32 |
break; |
33 |
*have += ret; |
34 |
} while (*have < len); |
35 |
if (ret < 0) { |
36 |
gz_error(state, Z_ERRNO, zstrerror()); |
37 |
return -1; |
38 |
} |
39 |
if (ret == 0) |
40 |
state->eof = 1; |
41 |
return 0; |
42 |
} |
43 |
|
44 |
/* Load up input buffer and set eof flag if last data loaded -- return -1 on |
45 |
error, 0 otherwise. Note that the eof flag is set when the end of the input |
46 |
file is reached, even though there may be unused data in the buffer. Once |
47 |
that data has been used, no more attempts will be made to read the file. |
48 |
If strm->avail_in != 0, then the current data is moved to the beginning of |
49 |
the input buffer, and then the remainder of the buffer is loaded with the |
50 |
available data from the input file. */ |
51 |
local int gz_avail(state) |
52 |
gz_statep state; |
53 |
{ |
54 |
unsigned got; |
55 |
z_streamp strm = &(state->strm); |
56 |
|
57 |
if (state->err != Z_OK && state->err != Z_BUF_ERROR) |
58 |
return -1; |
59 |
if (state->eof == 0) { |
60 |
if (strm->avail_in) { /* copy what's there to the start */ |
61 |
unsigned char *p = state->in; |
62 |
unsigned const char *q = strm->next_in; |
63 |
unsigned n = strm->avail_in; |
64 |
do { |
65 |
*p++ = *q++; |
66 |
} while (--n); |
67 |
} |
68 |
if (gz_load(state, state->in + strm->avail_in, |
69 |
state->size - strm->avail_in, &got) == -1) |
70 |
return -1; |
71 |
strm->avail_in += got; |
72 |
strm->next_in = state->in; |
73 |
} |
74 |
return 0; |
75 |
} |
76 |
|
77 |
/* Look for gzip header, set up for inflate or copy. state->x.have must be 0. |
78 |
If this is the first time in, allocate required memory. state->how will be |
79 |
left unchanged if there is no more input data available, will be set to COPY |
80 |
if there is no gzip header and direct copying will be performed, or it will |
81 |
be set to GZIP for decompression. If direct copying, then leftover input |
82 |
data from the input buffer will be copied to the output buffer. In that |
83 |
case, all further file reads will be directly to either the output buffer or |
84 |
a user buffer. If decompressing, the inflate state will be initialized. |
85 |
gz_look() will return 0 on success or -1 on failure. */ |
86 |
local int gz_look(state) |
87 |
gz_statep state; |
88 |
{ |
89 |
z_streamp strm = &(state->strm); |
90 |
|
91 |
/* allocate read buffers and inflate memory */ |
92 |
if (state->size == 0) { |
93 |
/* allocate buffers */ |
94 |
state->in = (unsigned char *)malloc(state->want); |
95 |
state->out = (unsigned char *)malloc(state->want << 1); |
96 |
if (state->in == NULL || state->out == NULL) { |
97 |
if (state->out != NULL) |
98 |
free(state->out); |
99 |
if (state->in != NULL) |
100 |
free(state->in); |
101 |
gz_error(state, Z_MEM_ERROR, "out of memory"); |
102 |
return -1; |
103 |
} |
104 |
state->size = state->want; |
105 |
|
106 |
/* allocate inflate memory */ |
107 |
state->strm.zalloc = Z_NULL; |
108 |
state->strm.zfree = Z_NULL; |
109 |
state->strm.opaque = Z_NULL; |
110 |
state->strm.avail_in = 0; |
111 |
state->strm.next_in = Z_NULL; |
112 |
if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ |
113 |
free(state->out); |
114 |
free(state->in); |
115 |
state->size = 0; |
116 |
gz_error(state, Z_MEM_ERROR, "out of memory"); |
117 |
return -1; |
118 |
} |
119 |
} |
120 |
|
121 |
/* get at least the magic bytes in the input buffer */ |
122 |
if (strm->avail_in < 2) { |
123 |
if (gz_avail(state) == -1) |
124 |
return -1; |
125 |
if (strm->avail_in == 0) |
126 |
return 0; |
127 |
} |
128 |
|
129 |
/* look for gzip magic bytes -- if there, do gzip decoding (note: there is |
130 |
a logical dilemma here when considering the case of a partially written |
131 |
gzip file, to wit, if a single 31 byte is written, then we cannot tell |
132 |
whether this is a single-byte file, or just a partially written gzip |
133 |
file -- for here we assume that if a gzip file is being written, then |
134 |
the header will be written in a single operation, so that reading a |
135 |
single byte is sufficient indication that it is not a gzip file) */ |
136 |
if (strm->avail_in > 1 && |
137 |
strm->next_in[0] == 31 && strm->next_in[1] == 139) { |
138 |
inflateReset(strm); |
139 |
state->how = GZIP; |
140 |
state->direct = 0; |
141 |
return 0; |
142 |
} |
143 |
|
144 |
/* no gzip header -- if we were decoding gzip before, then this is trailing |
145 |
garbage. Ignore the trailing garbage and finish. */ |
146 |
if (state->direct == 0) { |
147 |
strm->avail_in = 0; |
148 |
state->eof = 1; |
149 |
state->x.have = 0; |
150 |
return 0; |
151 |
} |
152 |
|
153 |
/* doing raw i/o, copy any leftover input to output -- this assumes that |
154 |
the output buffer is larger than the input buffer, which also assures |
155 |
space for gzungetc() */ |
156 |
state->x.next = state->out; |
157 |
if (strm->avail_in) { |
158 |
memcpy(state->x.next, strm->next_in, strm->avail_in); |
159 |
state->x.have = strm->avail_in; |
160 |
strm->avail_in = 0; |
161 |
} |
162 |
state->how = COPY; |
163 |
state->direct = 1; |
164 |
return 0; |
165 |
} |
166 |
|
167 |
/* Decompress from input to the provided next_out and avail_out in the state. |
168 |
On return, state->x.have and state->x.next point to the just decompressed |
169 |
data. If the gzip stream completes, state->how is reset to LOOK to look for |
170 |
the next gzip stream or raw data, once state->x.have is depleted. Returns 0 |
171 |
on success, -1 on failure. */ |
172 |
local int gz_decomp(state) |
173 |
gz_statep state; |
174 |
{ |
175 |
int ret = Z_OK; |
176 |
unsigned had; |
177 |
z_streamp strm = &(state->strm); |
178 |
|
179 |
/* fill output buffer up to end of deflate stream */ |
180 |
had = strm->avail_out; |
181 |
do { |
182 |
/* get more input for inflate() */ |
183 |
if (strm->avail_in == 0 && gz_avail(state) == -1) |
184 |
return -1; |
185 |
if (strm->avail_in == 0) { |
186 |
gz_error(state, Z_BUF_ERROR, "unexpected end of file"); |
187 |
break; |
188 |
} |
189 |
|
190 |
/* decompress and handle errors */ |
191 |
ret = inflate(strm, Z_NO_FLUSH); |
192 |
if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { |
193 |
gz_error(state, Z_STREAM_ERROR, |
194 |
"internal error: inflate stream corrupt"); |
195 |
return -1; |
196 |
} |
197 |
if (ret == Z_MEM_ERROR) { |
198 |
gz_error(state, Z_MEM_ERROR, "out of memory"); |
199 |
return -1; |
200 |
} |
201 |
if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ |
202 |
gz_error(state, Z_DATA_ERROR, |
203 |
strm->msg == NULL ? "compressed data error" : strm->msg); |
204 |
return -1; |
205 |
} |
206 |
} while (strm->avail_out && ret != Z_STREAM_END); |
207 |
|
208 |
/* update available output */ |
209 |
state->x.have = had - strm->avail_out; |
210 |
state->x.next = strm->next_out - state->x.have; |
211 |
|
212 |
/* if the gzip stream completed successfully, look for another */ |
213 |
if (ret == Z_STREAM_END) |
214 |
state->how = LOOK; |
215 |
|
216 |
/* good decompression */ |
217 |
return 0; |
218 |
} |
219 |
|
220 |
/* Fetch data and put it in the output buffer. Assumes state->x.have is 0. |
221 |
Data is either copied from the input file or decompressed from the input |
222 |
file depending on state->how. If state->how is LOOK, then a gzip header is |
223 |
looked for to determine whether to copy or decompress. Returns -1 on error, |
224 |
otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the |
225 |
end of the input file has been reached and all data has been processed. */ |
226 |
local int gz_fetch(state) |
227 |
gz_statep state; |
228 |
{ |
229 |
z_streamp strm = &(state->strm); |
230 |
|
231 |
do { |
232 |
switch(state->how) { |
233 |
case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ |
234 |
if (gz_look(state) == -1) |
235 |
return -1; |
236 |
if (state->how == LOOK) |
237 |
return 0; |
238 |
break; |
239 |
case COPY: /* -> COPY */ |
240 |
if (gz_load(state, state->out, state->size << 1, &(state->x.have)) |
241 |
== -1) |
242 |
return -1; |
243 |
state->x.next = state->out; |
244 |
return 0; |
245 |
case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ |
246 |
strm->avail_out = state->size << 1; |
247 |
strm->next_out = state->out; |
248 |
if (gz_decomp(state) == -1) |
249 |
return -1; |
250 |
} |
251 |
} while (state->x.have == 0 && (!state->eof || strm->avail_in)); |
252 |
return 0; |
253 |
} |
254 |
|
255 |
/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ |
256 |
local int gz_skip(state, len) |
257 |
gz_statep state; |
258 |
z_off64_t len; |
259 |
{ |
260 |
unsigned n; |
261 |
|
262 |
/* skip over len bytes or reach end-of-file, whichever comes first */ |
263 |
while (len) |
264 |
/* skip over whatever is in output buffer */ |
265 |
if (state->x.have) { |
266 |
n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? |
267 |
(unsigned)len : state->x.have; |
268 |
state->x.have -= n; |
269 |
state->x.next += n; |
270 |
state->x.pos += n; |
271 |
len -= n; |
272 |
} |
273 |
|
274 |
/* output buffer empty -- return if we're at the end of the input */ |
275 |
else if (state->eof && state->strm.avail_in == 0) |
276 |
break; |
277 |
|
278 |
/* need more data to skip -- load up output buffer */ |
279 |
else { |
280 |
/* get more output, looking for header if required */ |
281 |
if (gz_fetch(state) == -1) |
282 |
return -1; |
283 |
} |
284 |
return 0; |
285 |
} |
286 |
|
287 |
/* -- see zlib.h -- */ |
288 |
int ZEXPORT gzread(file, buf, len) |
289 |
gzFile file; |
290 |
voidp buf; |
291 |
unsigned len; |
292 |
{ |
293 |
unsigned got, n; |
294 |
gz_statep state; |
295 |
z_streamp strm; |
296 |
|
297 |
/* get internal structure */ |
298 |
if (file == NULL) |
299 |
return -1; |
300 |
state = (gz_statep)file; |
301 |
strm = &(state->strm); |
302 |
|
303 |
/* check that we're reading and that there's no (serious) error */ |
304 |
if (state->mode != GZ_READ || |
305 |
(state->err != Z_OK && state->err != Z_BUF_ERROR)) |
306 |
return -1; |
307 |
|
308 |
/* since an int is returned, make sure len fits in one, otherwise return |
309 |
with an error (this avoids the flaw in the interface) */ |
310 |
if ((int)len < 0) { |
311 |
gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); |
312 |
return -1; |
313 |
} |
314 |
|
315 |
/* if len is zero, avoid unnecessary operations */ |
316 |
if (len == 0) |
317 |
return 0; |
318 |
|
319 |
/* process a skip request */ |
320 |
if (state->seek) { |
321 |
state->seek = 0; |
322 |
if (gz_skip(state, state->skip) == -1) |
323 |
return -1; |
324 |
} |
325 |
|
326 |
/* get len bytes to buf, or less than len if at the end */ |
327 |
got = 0; |
328 |
do { |
329 |
/* first just try copying data from the output buffer */ |
330 |
if (state->x.have) { |
331 |
n = state->x.have > len ? len : state->x.have; |
332 |
memcpy(buf, state->x.next, n); |
333 |
state->x.next += n; |
334 |
state->x.have -= n; |
335 |
} |
336 |
|
337 |
/* output buffer empty -- return if we're at the end of the input */ |
338 |
else if (state->eof && strm->avail_in == 0) { |
339 |
state->past = 1; /* tried to read past end */ |
340 |
break; |
341 |
} |
342 |
|
343 |
/* need output data -- for small len or new stream load up our output |
344 |
buffer */ |
345 |
else if (state->how == LOOK || len < (state->size << 1)) { |
346 |
/* get more output, looking for header if required */ |
347 |
if (gz_fetch(state) == -1) |
348 |
return -1; |
349 |
continue; /* no progress yet -- go back to copy above */ |
350 |
/* the copy above assures that we will leave with space in the |
351 |
output buffer, allowing at least one gzungetc() to succeed */ |
352 |
} |
353 |
|
354 |
/* large len -- read directly into user buffer */ |
355 |
else if (state->how == COPY) { /* read directly */ |
356 |
if (gz_load(state, (unsigned char *)buf, len, &n) == -1) |
357 |
return -1; |
358 |
} |
359 |
|
360 |
/* large len -- decompress directly into user buffer */ |
361 |
else { /* state->how == GZIP */ |
362 |
strm->avail_out = len; |
363 |
strm->next_out = (unsigned char *)buf; |
364 |
if (gz_decomp(state) == -1) |
365 |
return -1; |
366 |
n = state->x.have; |
367 |
state->x.have = 0; |
368 |
} |
369 |
|
370 |
/* update progress */ |
371 |
len -= n; |
372 |
buf = (char *)buf + n; |
373 |
got += n; |
374 |
state->x.pos += n; |
375 |
} while (len); |
376 |
|
377 |
/* return number of bytes read into user buffer (will fit in int) */ |
378 |
return (int)got; |
379 |
} |
380 |
|
381 |
/* -- see zlib.h -- */ |
382 |
#ifdef Z_PREFIX_SET |
383 |
# undef z_gzgetc |
384 |
#else |
385 |
# undef gzgetc |
386 |
#endif |
387 |
int ZEXPORT gzgetc(file) |
388 |
gzFile file; |
389 |
{ |
390 |
int ret; |
391 |
unsigned char buf[1]; |
392 |
gz_statep state; |
393 |
|
394 |
/* get internal structure */ |
395 |
if (file == NULL) |
396 |
return -1; |
397 |
state = (gz_statep)file; |
398 |
|
399 |
/* check that we're reading and that there's no (serious) error */ |
400 |
if (state->mode != GZ_READ || |
401 |
(state->err != Z_OK && state->err != Z_BUF_ERROR)) |
402 |
return -1; |
403 |
|
404 |
/* try output buffer (no need to check for skip request) */ |
405 |
if (state->x.have) { |
406 |
state->x.have--; |
407 |
state->x.pos++; |
408 |
return *(state->x.next)++; |
409 |
} |
410 |
|
411 |
/* nothing there -- try gzread() */ |
412 |
ret = gzread(file, buf, 1); |
413 |
return ret < 1 ? -1 : buf[0]; |
414 |
} |
415 |
|
416 |
int ZEXPORT gzgetc_(file) |
417 |
gzFile file; |
418 |
{ |
419 |
return gzgetc(file); |
420 |
} |
421 |
|
422 |
/* -- see zlib.h -- */ |
423 |
int ZEXPORT gzungetc(c, file) |
424 |
int c; |
425 |
gzFile file; |
426 |
{ |
427 |
gz_statep state; |
428 |
|
429 |
/* get internal structure */ |
430 |
if (file == NULL) |
431 |
return -1; |
432 |
state = (gz_statep)file; |
433 |
|
434 |
/* check that we're reading and that there's no (serious) error */ |
435 |
if (state->mode != GZ_READ || |
436 |
(state->err != Z_OK && state->err != Z_BUF_ERROR)) |
437 |
return -1; |
438 |
|
439 |
/* process a skip request */ |
440 |
if (state->seek) { |
441 |
state->seek = 0; |
442 |
if (gz_skip(state, state->skip) == -1) |
443 |
return -1; |
444 |
} |
445 |
|
446 |
/* can't push EOF */ |
447 |
if (c < 0) |
448 |
return -1; |
449 |
|
450 |
/* if output buffer empty, put byte at end (allows more pushing) */ |
451 |
if (state->x.have == 0) { |
452 |
state->x.have = 1; |
453 |
state->x.next = state->out + (state->size << 1) - 1; |
454 |
state->x.next[0] = c; |
455 |
state->x.pos--; |
456 |
state->past = 0; |
457 |
return c; |
458 |
} |
459 |
|
460 |
/* if no room, give up (must have already done a gzungetc()) */ |
461 |
if (state->x.have == (state->size << 1)) { |
462 |
gz_error(state, Z_DATA_ERROR, "out of room to push characters"); |
463 |
return -1; |
464 |
} |
465 |
|
466 |
/* slide output data if needed and insert byte before existing data */ |
467 |
if (state->x.next == state->out) { |
468 |
unsigned char *src = state->out + state->x.have; |
469 |
unsigned char *dest = state->out + (state->size << 1); |
470 |
while (src > state->out) |
471 |
*--dest = *--src; |
472 |
state->x.next = dest; |
473 |
} |
474 |
state->x.have++; |
475 |
state->x.next--; |
476 |
state->x.next[0] = c; |
477 |
state->x.pos--; |
478 |
state->past = 0; |
479 |
return c; |
480 |
} |
481 |
|
482 |
/* -- see zlib.h -- */ |
483 |
char * ZEXPORT gzgets(file, buf, len) |
484 |
gzFile file; |
485 |
char *buf; |
486 |
int len; |
487 |
{ |
488 |
unsigned left, n; |
489 |
char *str; |
490 |
unsigned char *eol; |
491 |
gz_statep state; |
492 |
|
493 |
/* check parameters and get internal structure */ |
494 |
if (file == NULL || buf == NULL || len < 1) |
495 |
return NULL; |
496 |
state = (gz_statep)file; |
497 |
|
498 |
/* check that we're reading and that there's no (serious) error */ |
499 |
if (state->mode != GZ_READ || |
500 |
(state->err != Z_OK && state->err != Z_BUF_ERROR)) |
501 |
return NULL; |
502 |
|
503 |
/* process a skip request */ |
504 |
if (state->seek) { |
505 |
state->seek = 0; |
506 |
if (gz_skip(state, state->skip) == -1) |
507 |
return NULL; |
508 |
} |
509 |
|
510 |
/* copy output bytes up to new line or len - 1, whichever comes first -- |
511 |
append a terminating zero to the string (we don't check for a zero in |
512 |
the contents, let the user worry about that) */ |
513 |
str = buf; |
514 |
left = (unsigned)len - 1; |
515 |
if (left) do { |
516 |
/* assure that something is in the output buffer */ |
517 |
if (state->x.have == 0 && gz_fetch(state) == -1) |
518 |
return NULL; /* error */ |
519 |
if (state->x.have == 0) { /* end of file */ |
520 |
state->past = 1; /* read past end */ |
521 |
break; /* return what we have */ |
522 |
} |
523 |
|
524 |
/* look for end-of-line in current output buffer */ |
525 |
n = state->x.have > left ? left : state->x.have; |
526 |
eol = (unsigned char *)memchr(state->x.next, '\n', n); |
527 |
if (eol != NULL) |
528 |
n = (unsigned)(eol - state->x.next) + 1; |
529 |
|
530 |
/* copy through end-of-line, or remainder if not found */ |
531 |
memcpy(buf, state->x.next, n); |
532 |
state->x.have -= n; |
533 |
state->x.next += n; |
534 |
state->x.pos += n; |
535 |
left -= n; |
536 |
buf += n; |
537 |
} while (left && eol == NULL); |
538 |
|
539 |
/* return terminated string, or if nothing, end of file */ |
540 |
if (buf == str) |
541 |
return NULL; |
542 |
buf[0] = 0; |
543 |
return str; |
544 |
} |
545 |
|
546 |
/* -- see zlib.h -- */ |
547 |
int ZEXPORT gzdirect(file) |
548 |
gzFile file; |
549 |
{ |
550 |
gz_statep state; |
551 |
|
552 |
/* get internal structure */ |
553 |
if (file == NULL) |
554 |
return 0; |
555 |
state = (gz_statep)file; |
556 |
|
557 |
/* if the state is not known, but we can find out, then do so (this is |
558 |
mainly for right after a gzopen() or gzdopen()) */ |
559 |
if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) |
560 |
(void)gz_look(state); |
561 |
|
562 |
/* return 1 if transparent, 0 if processing a gzip stream */ |
563 |
return state->direct; |
564 |
} |
565 |
|
566 |
/* -- see zlib.h -- */ |
567 |
int ZEXPORT gzclose_r(file) |
568 |
gzFile file; |
569 |
{ |
570 |
int ret, err; |
571 |
gz_statep state; |
572 |
|
573 |
/* get internal structure */ |
574 |
if (file == NULL) |
575 |
return Z_STREAM_ERROR; |
576 |
state = (gz_statep)file; |
577 |
|
578 |
/* check that we're reading */ |
579 |
if (state->mode != GZ_READ) |
580 |
return Z_STREAM_ERROR; |
581 |
|
582 |
/* free memory and close file */ |
583 |
if (state->size) { |
584 |
inflateEnd(&(state->strm)); |
585 |
free(state->out); |
586 |
free(state->in); |
587 |
} |
588 |
err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; |
589 |
gz_error(state, Z_OK, NULL); |
590 |
free(state->path); |
591 |
ret = close(state->fd); |
592 |
free(state); |
593 |
return ret ? Z_ERRNO : err; |
594 |
} |