| 1 |
;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding |
| 2 |
; * |
| 3 |
; * inffas32.asm is derivated from inffas86.c, with translation of assembly code |
| 4 |
; * |
| 5 |
; * Copyright (C) 1995-2003 Mark Adler |
| 6 |
; * For conditions of distribution and use, see copyright notice in zlib.h |
| 7 |
; * |
| 8 |
; * Copyright (C) 2003 Chris Anderson <christop@charm.net> |
| 9 |
; * Please use the copyright conditions above. |
| 10 |
; * |
| 11 |
; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from |
| 12 |
; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at |
| 13 |
; * the moment. I have successfully compiled and tested this code with gcc2.96, |
| 14 |
; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S |
| 15 |
; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX |
| 16 |
; * enabled. I will attempt to merge the MMX code into this version. Newer |
| 17 |
; * versions of this and inffast.S can be found at |
| 18 |
; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ |
| 19 |
; * |
| 20 |
; * 2005 : modification by Gilles Vollant |
| 21 |
; */ |
| 22 |
; For Visual C++ 4.x and higher and ML 6.x and higher |
| 23 |
; ml.exe is in directory \MASM611C of Win95 DDK |
| 24 |
; ml.exe is also distributed in http://www.masm32.com/masmdl.htm |
| 25 |
; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ |
| 26 |
; |
| 27 |
; |
| 28 |
; compile with command line option |
| 29 |
; ml /coff /Zi /c /Flinffas32.lst inffas32.asm |
| 30 |
|
| 31 |
; if you define NO_GZIP (see inflate.h), compile with |
| 32 |
; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm |
| 33 |
|
| 34 |
|
| 35 |
; zlib122sup is 0 fort zlib 1.2.2.1 and lower |
| 36 |
; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head |
| 37 |
; in inflate_state in inflate.h) |
| 38 |
zlib1222sup equ 8 |
| 39 |
|
| 40 |
|
| 41 |
IFDEF GUNZIP |
| 42 |
INFLATE_MODE_TYPE equ 11 |
| 43 |
INFLATE_MODE_BAD equ 26 |
| 44 |
ELSE |
| 45 |
IFNDEF NO_GUNZIP |
| 46 |
INFLATE_MODE_TYPE equ 11 |
| 47 |
INFLATE_MODE_BAD equ 26 |
| 48 |
ELSE |
| 49 |
INFLATE_MODE_TYPE equ 3 |
| 50 |
INFLATE_MODE_BAD equ 17 |
| 51 |
ENDIF |
| 52 |
ENDIF |
| 53 |
|
| 54 |
|
| 55 |
; 75 "inffast.S" |
| 56 |
;FILE "inffast.S" |
| 57 |
|
| 58 |
;;;GLOBAL _inflate_fast |
| 59 |
|
| 60 |
;;;SECTION .text |
| 61 |
|
| 62 |
|
| 63 |
|
| 64 |
.586p |
| 65 |
.mmx |
| 66 |
|
| 67 |
name inflate_fast_x86 |
| 68 |
.MODEL FLAT |
| 69 |
|
| 70 |
_DATA segment |
| 71 |
inflate_fast_use_mmx: |
| 72 |
dd 1 |
| 73 |
|
| 74 |
|
| 75 |
_TEXT segment |
| 76 |
|
| 77 |
|
| 78 |
|
| 79 |
ALIGN 4 |
| 80 |
db 'Fast decoding Code from Chris Anderson' |
| 81 |
db 0 |
| 82 |
|
| 83 |
ALIGN 4 |
| 84 |
invalid_literal_length_code_msg: |
| 85 |
db 'invalid literal/length code' |
| 86 |
db 0 |
| 87 |
|
| 88 |
ALIGN 4 |
| 89 |
invalid_distance_code_msg: |
| 90 |
db 'invalid distance code' |
| 91 |
db 0 |
| 92 |
|
| 93 |
ALIGN 4 |
| 94 |
invalid_distance_too_far_msg: |
| 95 |
db 'invalid distance too far back' |
| 96 |
db 0 |
| 97 |
|
| 98 |
|
| 99 |
ALIGN 4 |
| 100 |
inflate_fast_mask: |
| 101 |
dd 0 |
| 102 |
dd 1 |
| 103 |
dd 3 |
| 104 |
dd 7 |
| 105 |
dd 15 |
| 106 |
dd 31 |
| 107 |
dd 63 |
| 108 |
dd 127 |
| 109 |
dd 255 |
| 110 |
dd 511 |
| 111 |
dd 1023 |
| 112 |
dd 2047 |
| 113 |
dd 4095 |
| 114 |
dd 8191 |
| 115 |
dd 16383 |
| 116 |
dd 32767 |
| 117 |
dd 65535 |
| 118 |
dd 131071 |
| 119 |
dd 262143 |
| 120 |
dd 524287 |
| 121 |
dd 1048575 |
| 122 |
dd 2097151 |
| 123 |
dd 4194303 |
| 124 |
dd 8388607 |
| 125 |
dd 16777215 |
| 126 |
dd 33554431 |
| 127 |
dd 67108863 |
| 128 |
dd 134217727 |
| 129 |
dd 268435455 |
| 130 |
dd 536870911 |
| 131 |
dd 1073741823 |
| 132 |
dd 2147483647 |
| 133 |
dd 4294967295 |
| 134 |
|
| 135 |
|
| 136 |
mode_state equ 0 ;/* state->mode */ |
| 137 |
wsize_state equ (32+zlib1222sup) ;/* state->wsize */ |
| 138 |
write_state equ (36+4+zlib1222sup) ;/* state->write */ |
| 139 |
window_state equ (40+4+zlib1222sup) ;/* state->window */ |
| 140 |
hold_state equ (44+4+zlib1222sup) ;/* state->hold */ |
| 141 |
bits_state equ (48+4+zlib1222sup) ;/* state->bits */ |
| 142 |
lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ |
| 143 |
distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ |
| 144 |
lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ |
| 145 |
distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ |
| 146 |
|
| 147 |
|
| 148 |
;;SECTION .text |
| 149 |
; 205 "inffast.S" |
| 150 |
;GLOBAL inflate_fast_use_mmx |
| 151 |
|
| 152 |
;SECTION .data |
| 153 |
|
| 154 |
|
| 155 |
; GLOBAL inflate_fast_use_mmx:object |
| 156 |
;.size inflate_fast_use_mmx, 4 |
| 157 |
; 226 "inffast.S" |
| 158 |
;SECTION .text |
| 159 |
|
| 160 |
ALIGN 4 |
| 161 |
_inflate_fast proc near |
| 162 |
.FPO (16, 4, 0, 0, 1, 0) |
| 163 |
push edi |
| 164 |
push esi |
| 165 |
push ebp |
| 166 |
push ebx |
| 167 |
pushfd |
| 168 |
sub esp,64 |
| 169 |
cld |
| 170 |
|
| 171 |
|
| 172 |
|
| 173 |
|
| 174 |
mov esi, [esp+88] |
| 175 |
mov edi, [esi+28] |
| 176 |
|
| 177 |
|
| 178 |
|
| 179 |
|
| 180 |
|
| 181 |
|
| 182 |
|
| 183 |
mov edx, [esi+4] |
| 184 |
mov eax, [esi+0] |
| 185 |
|
| 186 |
add edx,eax |
| 187 |
sub edx,11 |
| 188 |
|
| 189 |
mov [esp+44],eax |
| 190 |
mov [esp+20],edx |
| 191 |
|
| 192 |
mov ebp, [esp+92] |
| 193 |
mov ecx, [esi+16] |
| 194 |
mov ebx, [esi+12] |
| 195 |
|
| 196 |
sub ebp,ecx |
| 197 |
neg ebp |
| 198 |
add ebp,ebx |
| 199 |
|
| 200 |
sub ecx,257 |
| 201 |
add ecx,ebx |
| 202 |
|
| 203 |
mov [esp+60],ebx |
| 204 |
mov [esp+40],ebp |
| 205 |
mov [esp+16],ecx |
| 206 |
; 285 "inffast.S" |
| 207 |
mov eax, [edi+lencode_state] |
| 208 |
mov ecx, [edi+distcode_state] |
| 209 |
|
| 210 |
mov [esp+8],eax |
| 211 |
mov [esp+12],ecx |
| 212 |
|
| 213 |
mov eax,1 |
| 214 |
mov ecx, [edi+lenbits_state] |
| 215 |
shl eax,cl |
| 216 |
dec eax |
| 217 |
mov [esp+0],eax |
| 218 |
|
| 219 |
mov eax,1 |
| 220 |
mov ecx, [edi+distbits_state] |
| 221 |
shl eax,cl |
| 222 |
dec eax |
| 223 |
mov [esp+4],eax |
| 224 |
|
| 225 |
mov eax, [edi+wsize_state] |
| 226 |
mov ecx, [edi+write_state] |
| 227 |
mov edx, [edi+window_state] |
| 228 |
|
| 229 |
mov [esp+52],eax |
| 230 |
mov [esp+48],ecx |
| 231 |
mov [esp+56],edx |
| 232 |
|
| 233 |
mov ebp, [edi+hold_state] |
| 234 |
mov ebx, [edi+bits_state] |
| 235 |
; 321 "inffast.S" |
| 236 |
mov esi, [esp+44] |
| 237 |
mov ecx, [esp+20] |
| 238 |
cmp ecx,esi |
| 239 |
ja L_align_long |
| 240 |
|
| 241 |
add ecx,11 |
| 242 |
sub ecx,esi |
| 243 |
mov eax,12 |
| 244 |
sub eax,ecx |
| 245 |
lea edi, [esp+28] |
| 246 |
rep movsb |
| 247 |
mov ecx,eax |
| 248 |
xor eax,eax |
| 249 |
rep stosb |
| 250 |
lea esi, [esp+28] |
| 251 |
mov [esp+20],esi |
| 252 |
jmp L_is_aligned |
| 253 |
|
| 254 |
|
| 255 |
L_align_long: |
| 256 |
test esi,3 |
| 257 |
jz L_is_aligned |
| 258 |
xor eax,eax |
| 259 |
mov al, [esi] |
| 260 |
inc esi |
| 261 |
mov ecx,ebx |
| 262 |
add ebx,8 |
| 263 |
shl eax,cl |
| 264 |
or ebp,eax |
| 265 |
jmp L_align_long |
| 266 |
|
| 267 |
L_is_aligned: |
| 268 |
mov edi, [esp+60] |
| 269 |
; 366 "inffast.S" |
| 270 |
L_check_mmx: |
| 271 |
cmp dword ptr [inflate_fast_use_mmx],2 |
| 272 |
je L_init_mmx |
| 273 |
ja L_do_loop |
| 274 |
|
| 275 |
push eax |
| 276 |
push ebx |
| 277 |
push ecx |
| 278 |
push edx |
| 279 |
pushfd |
| 280 |
mov eax, [esp] |
| 281 |
xor dword ptr [esp],0200000h |
| 282 |
|
| 283 |
|
| 284 |
|
| 285 |
|
| 286 |
popfd |
| 287 |
pushfd |
| 288 |
pop edx |
| 289 |
xor edx,eax |
| 290 |
jz L_dont_use_mmx |
| 291 |
xor eax,eax |
| 292 |
cpuid |
| 293 |
cmp ebx,0756e6547h |
| 294 |
jne L_dont_use_mmx |
| 295 |
cmp ecx,06c65746eh |
| 296 |
jne L_dont_use_mmx |
| 297 |
cmp edx,049656e69h |
| 298 |
jne L_dont_use_mmx |
| 299 |
mov eax,1 |
| 300 |
cpuid |
| 301 |
shr eax,8 |
| 302 |
and eax,15 |
| 303 |
cmp eax,6 |
| 304 |
jne L_dont_use_mmx |
| 305 |
test edx,0800000h |
| 306 |
jnz L_use_mmx |
| 307 |
jmp L_dont_use_mmx |
| 308 |
L_use_mmx: |
| 309 |
mov dword ptr [inflate_fast_use_mmx],2 |
| 310 |
jmp L_check_mmx_pop |
| 311 |
L_dont_use_mmx: |
| 312 |
mov dword ptr [inflate_fast_use_mmx],3 |
| 313 |
L_check_mmx_pop: |
| 314 |
pop edx |
| 315 |
pop ecx |
| 316 |
pop ebx |
| 317 |
pop eax |
| 318 |
jmp L_check_mmx |
| 319 |
; 426 "inffast.S" |
| 320 |
ALIGN 4 |
| 321 |
L_do_loop: |
| 322 |
; 437 "inffast.S" |
| 323 |
cmp bl,15 |
| 324 |
ja L_get_length_code |
| 325 |
|
| 326 |
xor eax,eax |
| 327 |
lodsw |
| 328 |
mov cl,bl |
| 329 |
add bl,16 |
| 330 |
shl eax,cl |
| 331 |
or ebp,eax |
| 332 |
|
| 333 |
L_get_length_code: |
| 334 |
mov edx, [esp+0] |
| 335 |
mov ecx, [esp+8] |
| 336 |
and edx,ebp |
| 337 |
mov eax, [ecx+edx*4] |
| 338 |
|
| 339 |
L_dolen: |
| 340 |
|
| 341 |
|
| 342 |
|
| 343 |
|
| 344 |
|
| 345 |
|
| 346 |
mov cl,ah |
| 347 |
sub bl,ah |
| 348 |
shr ebp,cl |
| 349 |
|
| 350 |
|
| 351 |
|
| 352 |
|
| 353 |
|
| 354 |
|
| 355 |
test al,al |
| 356 |
jnz L_test_for_length_base |
| 357 |
|
| 358 |
shr eax,16 |
| 359 |
stosb |
| 360 |
|
| 361 |
L_while_test: |
| 362 |
|
| 363 |
|
| 364 |
cmp [esp+16],edi |
| 365 |
jbe L_break_loop |
| 366 |
|
| 367 |
cmp [esp+20],esi |
| 368 |
ja L_do_loop |
| 369 |
jmp L_break_loop |
| 370 |
|
| 371 |
L_test_for_length_base: |
| 372 |
; 502 "inffast.S" |
| 373 |
mov edx,eax |
| 374 |
shr edx,16 |
| 375 |
mov cl,al |
| 376 |
|
| 377 |
test al,16 |
| 378 |
jz L_test_for_second_level_length |
| 379 |
and cl,15 |
| 380 |
jz L_save_len |
| 381 |
cmp bl,cl |
| 382 |
jae L_add_bits_to_len |
| 383 |
|
| 384 |
mov ch,cl |
| 385 |
xor eax,eax |
| 386 |
lodsw |
| 387 |
mov cl,bl |
| 388 |
add bl,16 |
| 389 |
shl eax,cl |
| 390 |
or ebp,eax |
| 391 |
mov cl,ch |
| 392 |
|
| 393 |
L_add_bits_to_len: |
| 394 |
mov eax,1 |
| 395 |
shl eax,cl |
| 396 |
dec eax |
| 397 |
sub bl,cl |
| 398 |
and eax,ebp |
| 399 |
shr ebp,cl |
| 400 |
add edx,eax |
| 401 |
|
| 402 |
L_save_len: |
| 403 |
mov [esp+24],edx |
| 404 |
|
| 405 |
|
| 406 |
L_decode_distance: |
| 407 |
; 549 "inffast.S" |
| 408 |
cmp bl,15 |
| 409 |
ja L_get_distance_code |
| 410 |
|
| 411 |
xor eax,eax |
| 412 |
lodsw |
| 413 |
mov cl,bl |
| 414 |
add bl,16 |
| 415 |
shl eax,cl |
| 416 |
or ebp,eax |
| 417 |
|
| 418 |
L_get_distance_code: |
| 419 |
mov edx, [esp+4] |
| 420 |
mov ecx, [esp+12] |
| 421 |
and edx,ebp |
| 422 |
mov eax, [ecx+edx*4] |
| 423 |
|
| 424 |
|
| 425 |
L_dodist: |
| 426 |
mov edx,eax |
| 427 |
shr edx,16 |
| 428 |
mov cl,ah |
| 429 |
sub bl,ah |
| 430 |
shr ebp,cl |
| 431 |
; 584 "inffast.S" |
| 432 |
mov cl,al |
| 433 |
|
| 434 |
test al,16 |
| 435 |
jz L_test_for_second_level_dist |
| 436 |
and cl,15 |
| 437 |
jz L_check_dist_one |
| 438 |
cmp bl,cl |
| 439 |
jae L_add_bits_to_dist |
| 440 |
|
| 441 |
mov ch,cl |
| 442 |
xor eax,eax |
| 443 |
lodsw |
| 444 |
mov cl,bl |
| 445 |
add bl,16 |
| 446 |
shl eax,cl |
| 447 |
or ebp,eax |
| 448 |
mov cl,ch |
| 449 |
|
| 450 |
L_add_bits_to_dist: |
| 451 |
mov eax,1 |
| 452 |
shl eax,cl |
| 453 |
dec eax |
| 454 |
sub bl,cl |
| 455 |
and eax,ebp |
| 456 |
shr ebp,cl |
| 457 |
add edx,eax |
| 458 |
jmp L_check_window |
| 459 |
|
| 460 |
L_check_window: |
| 461 |
; 625 "inffast.S" |
| 462 |
mov [esp+44],esi |
| 463 |
mov eax,edi |
| 464 |
sub eax, [esp+40] |
| 465 |
|
| 466 |
cmp eax,edx |
| 467 |
jb L_clip_window |
| 468 |
|
| 469 |
mov ecx, [esp+24] |
| 470 |
mov esi,edi |
| 471 |
sub esi,edx |
| 472 |
|
| 473 |
sub ecx,3 |
| 474 |
mov al, [esi] |
| 475 |
mov [edi],al |
| 476 |
mov al, [esi+1] |
| 477 |
mov dl, [esi+2] |
| 478 |
add esi,3 |
| 479 |
mov [edi+1],al |
| 480 |
mov [edi+2],dl |
| 481 |
add edi,3 |
| 482 |
rep movsb |
| 483 |
|
| 484 |
mov esi, [esp+44] |
| 485 |
jmp L_while_test |
| 486 |
|
| 487 |
ALIGN 4 |
| 488 |
L_check_dist_one: |
| 489 |
cmp edx,1 |
| 490 |
jne L_check_window |
| 491 |
cmp [esp+40],edi |
| 492 |
je L_check_window |
| 493 |
|
| 494 |
dec edi |
| 495 |
mov ecx, [esp+24] |
| 496 |
mov al, [edi] |
| 497 |
sub ecx,3 |
| 498 |
|
| 499 |
mov [edi+1],al |
| 500 |
mov [edi+2],al |
| 501 |
mov [edi+3],al |
| 502 |
add edi,4 |
| 503 |
rep stosb |
| 504 |
|
| 505 |
jmp L_while_test |
| 506 |
|
| 507 |
ALIGN 4 |
| 508 |
L_test_for_second_level_length: |
| 509 |
|
| 510 |
|
| 511 |
|
| 512 |
|
| 513 |
test al,64 |
| 514 |
jnz L_test_for_end_of_block |
| 515 |
|
| 516 |
mov eax,1 |
| 517 |
shl eax,cl |
| 518 |
dec eax |
| 519 |
and eax,ebp |
| 520 |
add eax,edx |
| 521 |
mov edx, [esp+8] |
| 522 |
mov eax, [edx+eax*4] |
| 523 |
jmp L_dolen |
| 524 |
|
| 525 |
ALIGN 4 |
| 526 |
L_test_for_second_level_dist: |
| 527 |
|
| 528 |
|
| 529 |
|
| 530 |
|
| 531 |
test al,64 |
| 532 |
jnz L_invalid_distance_code |
| 533 |
|
| 534 |
mov eax,1 |
| 535 |
shl eax,cl |
| 536 |
dec eax |
| 537 |
and eax,ebp |
| 538 |
add eax,edx |
| 539 |
mov edx, [esp+12] |
| 540 |
mov eax, [edx+eax*4] |
| 541 |
jmp L_dodist |
| 542 |
|
| 543 |
ALIGN 4 |
| 544 |
L_clip_window: |
| 545 |
; 721 "inffast.S" |
| 546 |
mov ecx,eax |
| 547 |
mov eax, [esp+52] |
| 548 |
neg ecx |
| 549 |
mov esi, [esp+56] |
| 550 |
|
| 551 |
cmp eax,edx |
| 552 |
jb L_invalid_distance_too_far |
| 553 |
|
| 554 |
add ecx,edx |
| 555 |
cmp dword ptr [esp+48],0 |
| 556 |
jne L_wrap_around_window |
| 557 |
|
| 558 |
sub eax,ecx |
| 559 |
add esi,eax |
| 560 |
; 749 "inffast.S" |
| 561 |
mov eax, [esp+24] |
| 562 |
cmp eax,ecx |
| 563 |
jbe L_do_copy1 |
| 564 |
|
| 565 |
sub eax,ecx |
| 566 |
rep movsb |
| 567 |
mov esi,edi |
| 568 |
sub esi,edx |
| 569 |
jmp L_do_copy1 |
| 570 |
|
| 571 |
cmp eax,ecx |
| 572 |
jbe L_do_copy1 |
| 573 |
|
| 574 |
sub eax,ecx |
| 575 |
rep movsb |
| 576 |
mov esi,edi |
| 577 |
sub esi,edx |
| 578 |
jmp L_do_copy1 |
| 579 |
|
| 580 |
L_wrap_around_window: |
| 581 |
; 793 "inffast.S" |
| 582 |
mov eax, [esp+48] |
| 583 |
cmp ecx,eax |
| 584 |
jbe L_contiguous_in_window |
| 585 |
|
| 586 |
add esi, [esp+52] |
| 587 |
add esi,eax |
| 588 |
sub esi,ecx |
| 589 |
sub ecx,eax |
| 590 |
|
| 591 |
|
| 592 |
mov eax, [esp+24] |
| 593 |
cmp eax,ecx |
| 594 |
jbe L_do_copy1 |
| 595 |
|
| 596 |
sub eax,ecx |
| 597 |
rep movsb |
| 598 |
mov esi, [esp+56] |
| 599 |
mov ecx, [esp+48] |
| 600 |
cmp eax,ecx |
| 601 |
jbe L_do_copy1 |
| 602 |
|
| 603 |
sub eax,ecx |
| 604 |
rep movsb |
| 605 |
mov esi,edi |
| 606 |
sub esi,edx |
| 607 |
jmp L_do_copy1 |
| 608 |
|
| 609 |
L_contiguous_in_window: |
| 610 |
; 836 "inffast.S" |
| 611 |
add esi,eax |
| 612 |
sub esi,ecx |
| 613 |
|
| 614 |
|
| 615 |
mov eax, [esp+24] |
| 616 |
cmp eax,ecx |
| 617 |
jbe L_do_copy1 |
| 618 |
|
| 619 |
sub eax,ecx |
| 620 |
rep movsb |
| 621 |
mov esi,edi |
| 622 |
sub esi,edx |
| 623 |
|
| 624 |
L_do_copy1: |
| 625 |
; 862 "inffast.S" |
| 626 |
mov ecx,eax |
| 627 |
rep movsb |
| 628 |
|
| 629 |
mov esi, [esp+44] |
| 630 |
jmp L_while_test |
| 631 |
; 878 "inffast.S" |
| 632 |
ALIGN 4 |
| 633 |
L_init_mmx: |
| 634 |
emms |
| 635 |
|
| 636 |
|
| 637 |
|
| 638 |
|
| 639 |
|
| 640 |
movd mm0,ebp |
| 641 |
mov ebp,ebx |
| 642 |
; 896 "inffast.S" |
| 643 |
movd mm4,dword ptr [esp+0] |
| 644 |
movq mm3,mm4 |
| 645 |
movd mm5,dword ptr [esp+4] |
| 646 |
movq mm2,mm5 |
| 647 |
pxor mm1,mm1 |
| 648 |
mov ebx, [esp+8] |
| 649 |
jmp L_do_loop_mmx |
| 650 |
|
| 651 |
ALIGN 4 |
| 652 |
L_do_loop_mmx: |
| 653 |
psrlq mm0,mm1 |
| 654 |
|
| 655 |
cmp ebp,32 |
| 656 |
ja L_get_length_code_mmx |
| 657 |
|
| 658 |
movd mm6,ebp |
| 659 |
movd mm7,dword ptr [esi] |
| 660 |
add esi,4 |
| 661 |
psllq mm7,mm6 |
| 662 |
add ebp,32 |
| 663 |
por mm0,mm7 |
| 664 |
|
| 665 |
L_get_length_code_mmx: |
| 666 |
pand mm4,mm0 |
| 667 |
movd eax,mm4 |
| 668 |
movq mm4,mm3 |
| 669 |
mov eax, [ebx+eax*4] |
| 670 |
|
| 671 |
L_dolen_mmx: |
| 672 |
movzx ecx,ah |
| 673 |
movd mm1,ecx |
| 674 |
sub ebp,ecx |
| 675 |
|
| 676 |
test al,al |
| 677 |
jnz L_test_for_length_base_mmx |
| 678 |
|
| 679 |
shr eax,16 |
| 680 |
stosb |
| 681 |
|
| 682 |
L_while_test_mmx: |
| 683 |
|
| 684 |
|
| 685 |
cmp [esp+16],edi |
| 686 |
jbe L_break_loop |
| 687 |
|
| 688 |
cmp [esp+20],esi |
| 689 |
ja L_do_loop_mmx |
| 690 |
jmp L_break_loop |
| 691 |
|
| 692 |
L_test_for_length_base_mmx: |
| 693 |
|
| 694 |
mov edx,eax |
| 695 |
shr edx,16 |
| 696 |
|
| 697 |
test al,16 |
| 698 |
jz L_test_for_second_level_length_mmx |
| 699 |
and eax,15 |
| 700 |
jz L_decode_distance_mmx |
| 701 |
|
| 702 |
psrlq mm0,mm1 |
| 703 |
movd mm1,eax |
| 704 |
movd ecx,mm0 |
| 705 |
sub ebp,eax |
| 706 |
and ecx, [inflate_fast_mask+eax*4] |
| 707 |
add edx,ecx |
| 708 |
|
| 709 |
L_decode_distance_mmx: |
| 710 |
psrlq mm0,mm1 |
| 711 |
|
| 712 |
cmp ebp,32 |
| 713 |
ja L_get_dist_code_mmx |
| 714 |
|
| 715 |
movd mm6,ebp |
| 716 |
movd mm7,dword ptr [esi] |
| 717 |
add esi,4 |
| 718 |
psllq mm7,mm6 |
| 719 |
add ebp,32 |
| 720 |
por mm0,mm7 |
| 721 |
|
| 722 |
L_get_dist_code_mmx: |
| 723 |
mov ebx, [esp+12] |
| 724 |
pand mm5,mm0 |
| 725 |
movd eax,mm5 |
| 726 |
movq mm5,mm2 |
| 727 |
mov eax, [ebx+eax*4] |
| 728 |
|
| 729 |
L_dodist_mmx: |
| 730 |
|
| 731 |
movzx ecx,ah |
| 732 |
mov ebx,eax |
| 733 |
shr ebx,16 |
| 734 |
sub ebp,ecx |
| 735 |
movd mm1,ecx |
| 736 |
|
| 737 |
test al,16 |
| 738 |
jz L_test_for_second_level_dist_mmx |
| 739 |
and eax,15 |
| 740 |
jz L_check_dist_one_mmx |
| 741 |
|
| 742 |
L_add_bits_to_dist_mmx: |
| 743 |
psrlq mm0,mm1 |
| 744 |
movd mm1,eax |
| 745 |
movd ecx,mm0 |
| 746 |
sub ebp,eax |
| 747 |
and ecx, [inflate_fast_mask+eax*4] |
| 748 |
add ebx,ecx |
| 749 |
|
| 750 |
L_check_window_mmx: |
| 751 |
mov [esp+44],esi |
| 752 |
mov eax,edi |
| 753 |
sub eax, [esp+40] |
| 754 |
|
| 755 |
cmp eax,ebx |
| 756 |
jb L_clip_window_mmx |
| 757 |
|
| 758 |
mov ecx,edx |
| 759 |
mov esi,edi |
| 760 |
sub esi,ebx |
| 761 |
|
| 762 |
sub ecx,3 |
| 763 |
mov al, [esi] |
| 764 |
mov [edi],al |
| 765 |
mov al, [esi+1] |
| 766 |
mov dl, [esi+2] |
| 767 |
add esi,3 |
| 768 |
mov [edi+1],al |
| 769 |
mov [edi+2],dl |
| 770 |
add edi,3 |
| 771 |
rep movsb |
| 772 |
|
| 773 |
mov esi, [esp+44] |
| 774 |
mov ebx, [esp+8] |
| 775 |
jmp L_while_test_mmx |
| 776 |
|
| 777 |
ALIGN 4 |
| 778 |
L_check_dist_one_mmx: |
| 779 |
cmp ebx,1 |
| 780 |
jne L_check_window_mmx |
| 781 |
cmp [esp+40],edi |
| 782 |
je L_check_window_mmx |
| 783 |
|
| 784 |
dec edi |
| 785 |
mov ecx,edx |
| 786 |
mov al, [edi] |
| 787 |
sub ecx,3 |
| 788 |
|
| 789 |
mov [edi+1],al |
| 790 |
mov [edi+2],al |
| 791 |
mov [edi+3],al |
| 792 |
add edi,4 |
| 793 |
rep stosb |
| 794 |
|
| 795 |
mov ebx, [esp+8] |
| 796 |
jmp L_while_test_mmx |
| 797 |
|
| 798 |
ALIGN 4 |
| 799 |
L_test_for_second_level_length_mmx: |
| 800 |
test al,64 |
| 801 |
jnz L_test_for_end_of_block |
| 802 |
|
| 803 |
and eax,15 |
| 804 |
psrlq mm0,mm1 |
| 805 |
movd ecx,mm0 |
| 806 |
and ecx, [inflate_fast_mask+eax*4] |
| 807 |
add ecx,edx |
| 808 |
mov eax, [ebx+ecx*4] |
| 809 |
jmp L_dolen_mmx |
| 810 |
|
| 811 |
ALIGN 4 |
| 812 |
L_test_for_second_level_dist_mmx: |
| 813 |
test al,64 |
| 814 |
jnz L_invalid_distance_code |
| 815 |
|
| 816 |
and eax,15 |
| 817 |
psrlq mm0,mm1 |
| 818 |
movd ecx,mm0 |
| 819 |
and ecx, [inflate_fast_mask+eax*4] |
| 820 |
mov eax, [esp+12] |
| 821 |
add ecx,ebx |
| 822 |
mov eax, [eax+ecx*4] |
| 823 |
jmp L_dodist_mmx |
| 824 |
|
| 825 |
ALIGN 4 |
| 826 |
L_clip_window_mmx: |
| 827 |
|
| 828 |
mov ecx,eax |
| 829 |
mov eax, [esp+52] |
| 830 |
neg ecx |
| 831 |
mov esi, [esp+56] |
| 832 |
|
| 833 |
cmp eax,ebx |
| 834 |
jb L_invalid_distance_too_far |
| 835 |
|
| 836 |
add ecx,ebx |
| 837 |
cmp dword ptr [esp+48],0 |
| 838 |
jne L_wrap_around_window_mmx |
| 839 |
|
| 840 |
sub eax,ecx |
| 841 |
add esi,eax |
| 842 |
|
| 843 |
cmp edx,ecx |
| 844 |
jbe L_do_copy1_mmx |
| 845 |
|
| 846 |
sub edx,ecx |
| 847 |
rep movsb |
| 848 |
mov esi,edi |
| 849 |
sub esi,ebx |
| 850 |
jmp L_do_copy1_mmx |
| 851 |
|
| 852 |
cmp edx,ecx |
| 853 |
jbe L_do_copy1_mmx |
| 854 |
|
| 855 |
sub edx,ecx |
| 856 |
rep movsb |
| 857 |
mov esi,edi |
| 858 |
sub esi,ebx |
| 859 |
jmp L_do_copy1_mmx |
| 860 |
|
| 861 |
L_wrap_around_window_mmx: |
| 862 |
|
| 863 |
mov eax, [esp+48] |
| 864 |
cmp ecx,eax |
| 865 |
jbe L_contiguous_in_window_mmx |
| 866 |
|
| 867 |
add esi, [esp+52] |
| 868 |
add esi,eax |
| 869 |
sub esi,ecx |
| 870 |
sub ecx,eax |
| 871 |
|
| 872 |
|
| 873 |
cmp edx,ecx |
| 874 |
jbe L_do_copy1_mmx |
| 875 |
|
| 876 |
sub edx,ecx |
| 877 |
rep movsb |
| 878 |
mov esi, [esp+56] |
| 879 |
mov ecx, [esp+48] |
| 880 |
cmp edx,ecx |
| 881 |
jbe L_do_copy1_mmx |
| 882 |
|
| 883 |
sub edx,ecx |
| 884 |
rep movsb |
| 885 |
mov esi,edi |
| 886 |
sub esi,ebx |
| 887 |
jmp L_do_copy1_mmx |
| 888 |
|
| 889 |
L_contiguous_in_window_mmx: |
| 890 |
|
| 891 |
add esi,eax |
| 892 |
sub esi,ecx |
| 893 |
|
| 894 |
|
| 895 |
cmp edx,ecx |
| 896 |
jbe L_do_copy1_mmx |
| 897 |
|
| 898 |
sub edx,ecx |
| 899 |
rep movsb |
| 900 |
mov esi,edi |
| 901 |
sub esi,ebx |
| 902 |
|
| 903 |
L_do_copy1_mmx: |
| 904 |
|
| 905 |
|
| 906 |
mov ecx,edx |
| 907 |
rep movsb |
| 908 |
|
| 909 |
mov esi, [esp+44] |
| 910 |
mov ebx, [esp+8] |
| 911 |
jmp L_while_test_mmx |
| 912 |
; 1174 "inffast.S" |
| 913 |
L_invalid_distance_code: |
| 914 |
|
| 915 |
|
| 916 |
|
| 917 |
|
| 918 |
|
| 919 |
mov ecx, invalid_distance_code_msg |
| 920 |
mov edx,INFLATE_MODE_BAD |
| 921 |
jmp L_update_stream_state |
| 922 |
|
| 923 |
L_test_for_end_of_block: |
| 924 |
|
| 925 |
|
| 926 |
|
| 927 |
|
| 928 |
|
| 929 |
test al,32 |
| 930 |
jz L_invalid_literal_length_code |
| 931 |
|
| 932 |
mov ecx,0 |
| 933 |
mov edx,INFLATE_MODE_TYPE |
| 934 |
jmp L_update_stream_state |
| 935 |
|
| 936 |
L_invalid_literal_length_code: |
| 937 |
|
| 938 |
|
| 939 |
|
| 940 |
|
| 941 |
|
| 942 |
mov ecx, invalid_literal_length_code_msg |
| 943 |
mov edx,INFLATE_MODE_BAD |
| 944 |
jmp L_update_stream_state |
| 945 |
|
| 946 |
L_invalid_distance_too_far: |
| 947 |
|
| 948 |
|
| 949 |
|
| 950 |
mov esi, [esp+44] |
| 951 |
mov ecx, invalid_distance_too_far_msg |
| 952 |
mov edx,INFLATE_MODE_BAD |
| 953 |
jmp L_update_stream_state |
| 954 |
|
| 955 |
L_update_stream_state: |
| 956 |
|
| 957 |
mov eax, [esp+88] |
| 958 |
test ecx,ecx |
| 959 |
jz L_skip_msg |
| 960 |
mov [eax+24],ecx |
| 961 |
L_skip_msg: |
| 962 |
mov eax, [eax+28] |
| 963 |
mov [eax+mode_state],edx |
| 964 |
jmp L_break_loop |
| 965 |
|
| 966 |
ALIGN 4 |
| 967 |
L_break_loop: |
| 968 |
; 1243 "inffast.S" |
| 969 |
cmp dword ptr [inflate_fast_use_mmx],2 |
| 970 |
jne L_update_next_in |
| 971 |
|
| 972 |
|
| 973 |
|
| 974 |
mov ebx,ebp |
| 975 |
|
| 976 |
L_update_next_in: |
| 977 |
; 1266 "inffast.S" |
| 978 |
mov eax, [esp+88] |
| 979 |
mov ecx,ebx |
| 980 |
mov edx, [eax+28] |
| 981 |
shr ecx,3 |
| 982 |
sub esi,ecx |
| 983 |
shl ecx,3 |
| 984 |
sub ebx,ecx |
| 985 |
mov [eax+12],edi |
| 986 |
mov [edx+bits_state],ebx |
| 987 |
mov ecx,ebx |
| 988 |
|
| 989 |
lea ebx, [esp+28] |
| 990 |
cmp [esp+20],ebx |
| 991 |
jne L_buf_not_used |
| 992 |
|
| 993 |
sub esi,ebx |
| 994 |
mov ebx, [eax+0] |
| 995 |
mov [esp+20],ebx |
| 996 |
add esi,ebx |
| 997 |
mov ebx, [eax+4] |
| 998 |
sub ebx,11 |
| 999 |
add [esp+20],ebx |
| 1000 |
|
| 1001 |
L_buf_not_used: |
| 1002 |
mov [eax+0],esi |
| 1003 |
|
| 1004 |
mov ebx,1 |
| 1005 |
shl ebx,cl |
| 1006 |
dec ebx |
| 1007 |
|
| 1008 |
|
| 1009 |
|
| 1010 |
|
| 1011 |
|
| 1012 |
cmp dword ptr [inflate_fast_use_mmx],2 |
| 1013 |
jne L_update_hold |
| 1014 |
|
| 1015 |
|
| 1016 |
|
| 1017 |
psrlq mm0,mm1 |
| 1018 |
movd ebp,mm0 |
| 1019 |
|
| 1020 |
emms |
| 1021 |
|
| 1022 |
L_update_hold: |
| 1023 |
|
| 1024 |
|
| 1025 |
|
| 1026 |
and ebp,ebx |
| 1027 |
mov [edx+hold_state],ebp |
| 1028 |
|
| 1029 |
|
| 1030 |
|
| 1031 |
|
| 1032 |
mov ebx, [esp+20] |
| 1033 |
cmp ebx,esi |
| 1034 |
jbe L_last_is_smaller |
| 1035 |
|
| 1036 |
sub ebx,esi |
| 1037 |
add ebx,11 |
| 1038 |
mov [eax+4],ebx |
| 1039 |
jmp L_fixup_out |
| 1040 |
L_last_is_smaller: |
| 1041 |
sub esi,ebx |
| 1042 |
neg esi |
| 1043 |
add esi,11 |
| 1044 |
mov [eax+4],esi |
| 1045 |
|
| 1046 |
|
| 1047 |
|
| 1048 |
|
| 1049 |
L_fixup_out: |
| 1050 |
|
| 1051 |
mov ebx, [esp+16] |
| 1052 |
cmp ebx,edi |
| 1053 |
jbe L_end_is_smaller |
| 1054 |
|
| 1055 |
sub ebx,edi |
| 1056 |
add ebx,257 |
| 1057 |
mov [eax+16],ebx |
| 1058 |
jmp L_done |
| 1059 |
L_end_is_smaller: |
| 1060 |
sub edi,ebx |
| 1061 |
neg edi |
| 1062 |
add edi,257 |
| 1063 |
mov [eax+16],edi |
| 1064 |
|
| 1065 |
|
| 1066 |
|
| 1067 |
|
| 1068 |
|
| 1069 |
L_done: |
| 1070 |
add esp,64 |
| 1071 |
popfd |
| 1072 |
pop ebx |
| 1073 |
pop ebp |
| 1074 |
pop esi |
| 1075 |
pop edi |
| 1076 |
ret |
| 1077 |
_inflate_fast endp |
| 1078 |
|
| 1079 |
_TEXT ends |
| 1080 |
end |