| 1 | 
 ; inffasx64.asm is a hand tuned assembler version of inffast.c - fast decoding | 
 
 
 
 
 
 | 2 | 
 ; version for AMD64 on Windows using Microsoft C compiler | 
 
 
 
 
 
 | 3 | 
 ; | 
 
 
 
 
 
 | 4 | 
 ; inffasx64.asm is automatically convert from AMD64 portion of inffas86.c | 
 
 
 
 
 
 | 5 | 
 ; inffasx64.asm is called by inffas8664.c, which contain more info. | 
 
 
 
 
 
 | 6 | 
  | 
 
 
 
 
 
 | 7 | 
  | 
 
 
 
 
 
 | 8 | 
 ; to compile this file, I use option | 
 
 
 
 
 
 | 9 | 
 ;   ml64.exe /Flinffasx64 /c /Zi inffasx64.asm | 
 
 
 
 
 
 | 10 | 
 ;   with Microsoft Macro Assembler (x64) for AMD64 | 
 
 
 
 
 
 | 11 | 
 ; | 
 
 
 
 
 
 | 12 | 
  | 
 
 
 
 
 
 | 13 | 
 ; This file compile with Microsoft Macro Assembler (x64) for AMD64 | 
 
 
 
 
 
 | 14 | 
 ; | 
 
 
 
 
 
 | 15 | 
 ;   ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK | 
 
 
 
 
 
 | 16 | 
 ; | 
 
 
 
 
 
 | 17 | 
 ;   (you can get Windows WDK with ml64 for AMD64 from | 
 
 
 
 
 
 | 18 | 
 ;      http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price) | 
 
 
 
 
 
 | 19 | 
 ; | 
 
 
 
 
 
 | 20 | 
  | 
 
 
 
 
 
 | 21 | 
  | 
 
 
 
 
 
 | 22 | 
 .code | 
 
 
 
 
 
 | 23 | 
 inffas8664fnc PROC | 
 
 
 
 
 
 | 24 | 
  | 
 
 
 
 
 
 | 25 | 
 ; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and | 
 
 
 
 
 
 | 26 | 
 ; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp | 
 
 
 
 
 
 | 27 | 
 ; | 
 
 
 
 
 
 | 28 | 
 ; All registers must be preserved across the call, except for | 
 
 
 
 
 
 | 29 | 
 ;   rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch. | 
 
 
 
 
 
 | 30 | 
  | 
 
 
 
 
 
 | 31 | 
  | 
 
 
 
 
 
 | 32 | 
         mov [rsp-8],rsi | 
 
 
 
 
 
 | 33 | 
         mov [rsp-16],rdi | 
 
 
 
 
 
 | 34 | 
         mov [rsp-24],r12 | 
 
 
 
 
 
 | 35 | 
         mov [rsp-32],r13 | 
 
 
 
 
 
 | 36 | 
         mov [rsp-40],r14 | 
 
 
 
 
 
 | 37 | 
         mov [rsp-48],r15 | 
 
 
 
 
 
 | 38 | 
         mov [rsp-56],rbx | 
 
 
 
 
 
 | 39 | 
  | 
 
 
 
 
 
 | 40 | 
         mov rax,rcx | 
 
 
 
 
 
 | 41 | 
  | 
 
 
 
 
 
 | 42 | 
         mov     [rax+8], rbp       ; /* save regs rbp and rsp */ | 
 
 
 
 
 
 | 43 | 
         mov     [rax], rsp | 
 
 
 
 
 
 | 44 | 
  | 
 
 
 
 
 
 | 45 | 
         mov     rsp, rax          ; /* make rsp point to &ar */ | 
 
 
 
 
 
 | 46 | 
  | 
 
 
 
 
 
 | 47 | 
         mov     rsi, [rsp+16]      ; /* rsi  = in */ | 
 
 
 
 
 
 | 48 | 
         mov     rdi, [rsp+32]      ; /* rdi  = out */ | 
 
 
 
 
 
 | 49 | 
         mov     r9, [rsp+24]       ; /* r9   = last */ | 
 
 
 
 
 
 | 50 | 
         mov     r10, [rsp+48]      ; /* r10  = end */ | 
 
 
 
 
 
 | 51 | 
         mov     rbp, [rsp+64]      ; /* rbp  = lcode */ | 
 
 
 
 
 
 | 52 | 
         mov     r11, [rsp+72]      ; /* r11  = dcode */ | 
 
 
 
 
 
 | 53 | 
         mov     rdx, [rsp+80]      ; /* rdx  = hold */ | 
 
 
 
 
 
 | 54 | 
         mov     ebx, [rsp+88]      ; /* ebx  = bits */ | 
 
 
 
 
 
 | 55 | 
         mov     r12d, [rsp+100]    ; /* r12d = lmask */ | 
 
 
 
 
 
 | 56 | 
         mov     r13d, [rsp+104]    ; /* r13d = dmask */ | 
 
 
 
 
 
 | 57 | 
                                           ; /* r14d = len */ | 
 
 
 
 
 
 | 58 | 
                                           ; /* r15d = dist */ | 
 
 
 
 
 
 | 59 | 
  | 
 
 
 
 
 
 | 60 | 
  | 
 
 
 
 
 
 | 61 | 
         cld | 
 
 
 
 
 
 | 62 | 
         cmp     r10, rdi | 
 
 
 
 
 
 | 63 | 
         je      L_one_time           ; /* if only one decode left */ | 
 
 
 
 
 
 | 64 | 
         cmp     r9, rsi | 
 
 
 
 
 
 | 65 | 
  | 
 
 
 
 
 
 | 66 | 
     jne L_do_loop | 
 
 
 
 
 
 | 67 | 
  | 
 
 
 
 
 
 | 68 | 
  | 
 
 
 
 
 
 | 69 | 
 L_one_time: | 
 
 
 
 
 
 | 70 | 
         mov     r8, r12           ; /* r8 = lmask */ | 
 
 
 
 
 
 | 71 | 
         cmp     bl, 32 | 
 
 
 
 
 
 | 72 | 
         ja      L_get_length_code_one_time | 
 
 
 
 
 
 | 73 | 
  | 
 
 
 
 
 
 | 74 | 
         lodsd                         ; /* eax = *(uint *)in++ */ | 
 
 
 
 
 
 | 75 | 
         mov     cl, bl            ; /* cl = bits, needs it for shifting */ | 
 
 
 
 
 
 | 76 | 
         add     bl, 32             ; /* bits += 32 */ | 
 
 
 
 
 
 | 77 | 
         shl     rax, cl | 
 
 
 
 
 
 | 78 | 
         or      rdx, rax          ; /* hold |= *((uint *)in)++ << bits */ | 
 
 
 
 
 
 | 79 | 
         jmp     L_get_length_code_one_time | 
 
 
 
 
 
 | 80 | 
  | 
 
 
 
 
 
 | 81 | 
 ALIGN 4 | 
 
 
 
 
 
 | 82 | 
 L_while_test: | 
 
 
 
 
 
 | 83 | 
         cmp     r10, rdi | 
 
 
 
 
 
 | 84 | 
         jbe     L_break_loop | 
 
 
 
 
 
 | 85 | 
         cmp     r9, rsi | 
 
 
 
 
 
 | 86 | 
         jbe     L_break_loop | 
 
 
 
 
 
 | 87 | 
  | 
 
 
 
 
 
 | 88 | 
 L_do_loop: | 
 
 
 
 
 
 | 89 | 
         mov     r8, r12           ; /* r8 = lmask */ | 
 
 
 
 
 
 | 90 | 
         cmp     bl, 32 | 
 
 
 
 
 
 | 91 | 
         ja      L_get_length_code    ; /* if (32 < bits) */ | 
 
 
 
 
 
 | 92 | 
  | 
 
 
 
 
 
 | 93 | 
         lodsd                         ; /* eax = *(uint *)in++ */ | 
 
 
 
 
 
 | 94 | 
         mov     cl, bl            ; /* cl = bits, needs it for shifting */ | 
 
 
 
 
 
 | 95 | 
         add     bl, 32             ; /* bits += 32 */ | 
 
 
 
 
 
 | 96 | 
         shl     rax, cl | 
 
 
 
 
 
 | 97 | 
         or      rdx, rax          ; /* hold |= *((uint *)in)++ << bits */ | 
 
 
 
 
 
 | 98 | 
  | 
 
 
 
 
 
 | 99 | 
 L_get_length_code: | 
 
 
 
 
 
 | 100 | 
         and     r8, rdx            ; /* r8 &= hold */ | 
 
 
 
 
 
 | 101 | 
         mov     eax, [rbp+r8*4]  ; /* eax = lcode[hold & lmask] */ | 
 
 
 
 
 
 | 102 | 
  | 
 
 
 
 
 
 | 103 | 
         mov     cl, ah            ; /* cl = this.bits */ | 
 
 
 
 
 
 | 104 | 
         sub     bl, ah            ; /* bits -= this.bits */ | 
 
 
 
 
 
 | 105 | 
         shr     rdx, cl           ; /* hold >>= this.bits */ | 
 
 
 
 
 
 | 106 | 
  | 
 
 
 
 
 
 | 107 | 
         test    al, al | 
 
 
 
 
 
 | 108 | 
         jnz     L_test_for_length_base ; /* if (op != 0) 45.7% */ | 
 
 
 
 
 
 | 109 | 
  | 
 
 
 
 
 
 | 110 | 
         mov     r8, r12            ; /* r8 = lmask */ | 
 
 
 
 
 
 | 111 | 
         shr     eax, 16            ; /* output this.val char */ | 
 
 
 
 
 
 | 112 | 
         stosb | 
 
 
 
 
 
 | 113 | 
  | 
 
 
 
 
 
 | 114 | 
 L_get_length_code_one_time: | 
 
 
 
 
 
 | 115 | 
         and     r8, rdx            ; /* r8 &= hold */ | 
 
 
 
 
 
 | 116 | 
         mov     eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */ | 
 
 
 
 
 
 | 117 | 
  | 
 
 
 
 
 
 | 118 | 
 L_dolen: | 
 
 
 
 
 
 | 119 | 
         mov     cl, ah            ; /* cl = this.bits */ | 
 
 
 
 
 
 | 120 | 
         sub     bl, ah            ; /* bits -= this.bits */ | 
 
 
 
 
 
 | 121 | 
         shr     rdx, cl           ; /* hold >>= this.bits */ | 
 
 
 
 
 
 | 122 | 
  | 
 
 
 
 
 
 | 123 | 
         test    al, al | 
 
 
 
 
 
 | 124 | 
         jnz     L_test_for_length_base ; /* if (op != 0) 45.7% */ | 
 
 
 
 
 
 | 125 | 
  | 
 
 
 
 
 
 | 126 | 
         shr     eax, 16            ; /* output this.val char */ | 
 
 
 
 
 
 | 127 | 
         stosb | 
 
 
 
 
 
 | 128 | 
         jmp     L_while_test | 
 
 
 
 
 
 | 129 | 
  | 
 
 
 
 
 
 | 130 | 
 ALIGN 4 | 
 
 
 
 
 
 | 131 | 
 L_test_for_length_base: | 
 
 
 
 
 
 | 132 | 
         mov     r14d, eax         ; /* len = this */ | 
 
 
 
 
 
 | 133 | 
         shr     r14d, 16           ; /* len = this.val */ | 
 
 
 
 
 
 | 134 | 
         mov     cl, al | 
 
 
 
 
 
 | 135 | 
  | 
 
 
 
 
 
 | 136 | 
         test    al, 16 | 
 
 
 
 
 
 | 137 | 
         jz      L_test_for_second_level_length ; /* if ((op & 16) == 0) 8% */ | 
 
 
 
 
 
 | 138 | 
         and     cl, 15             ; /* op &= 15 */ | 
 
 
 
 
 
 | 139 | 
         jz      L_decode_distance    ; /* if (!op) */ | 
 
 
 
 
 
 | 140 | 
  | 
 
 
 
 
 
 | 141 | 
 L_add_bits_to_len: | 
 
 
 
 
 
 | 142 | 
         sub     bl, cl | 
 
 
 
 
 
 | 143 | 
         xor     eax, eax | 
 
 
 
 
 
 | 144 | 
         inc     eax | 
 
 
 
 
 
 | 145 | 
         shl     eax, cl | 
 
 
 
 
 
 | 146 | 
         dec     eax | 
 
 
 
 
 
 | 147 | 
         and     eax, edx          ; /* eax &= hold */ | 
 
 
 
 
 
 | 148 | 
         shr     rdx, cl | 
 
 
 
 
 
 | 149 | 
         add     r14d, eax         ; /* len += hold & mask[op] */ | 
 
 
 
 
 
 | 150 | 
  | 
 
 
 
 
 
 | 151 | 
 L_decode_distance: | 
 
 
 
 
 
 | 152 | 
         mov     r8, r13           ; /* r8 = dmask */ | 
 
 
 
 
 
 | 153 | 
         cmp     bl, 32 | 
 
 
 
 
 
 | 154 | 
         ja      L_get_distance_code  ; /* if (32 < bits) */ | 
 
 
 
 
 
 | 155 | 
  | 
 
 
 
 
 
 | 156 | 
         lodsd                         ; /* eax = *(uint *)in++ */ | 
 
 
 
 
 
 | 157 | 
         mov     cl, bl            ; /* cl = bits, needs it for shifting */ | 
 
 
 
 
 
 | 158 | 
         add     bl, 32             ; /* bits += 32 */ | 
 
 
 
 
 
 | 159 | 
         shl     rax, cl | 
 
 
 
 
 
 | 160 | 
         or      rdx, rax          ; /* hold |= *((uint *)in)++ << bits */ | 
 
 
 
 
 
 | 161 | 
  | 
 
 
 
 
 
 | 162 | 
 L_get_distance_code: | 
 
 
 
 
 
 | 163 | 
         and     r8, rdx           ; /* r8 &= hold */ | 
 
 
 
 
 
 | 164 | 
         mov     eax, [r11+r8*4] ; /* eax = dcode[hold & dmask] */ | 
 
 
 
 
 
 | 165 | 
  | 
 
 
 
 
 
 | 166 | 
 L_dodist: | 
 
 
 
 
 
 | 167 | 
         mov     r15d, eax         ; /* dist = this */ | 
 
 
 
 
 
 | 168 | 
         shr     r15d, 16           ; /* dist = this.val */ | 
 
 
 
 
 
 | 169 | 
         mov     cl, ah | 
 
 
 
 
 
 | 170 | 
         sub     bl, ah            ; /* bits -= this.bits */ | 
 
 
 
 
 
 | 171 | 
         shr     rdx, cl           ; /* hold >>= this.bits */ | 
 
 
 
 
 
 | 172 | 
         mov     cl, al            ; /* cl = this.op */ | 
 
 
 
 
 
 | 173 | 
  | 
 
 
 
 
 
 | 174 | 
         test    al, 16             ; /* if ((op & 16) == 0) */ | 
 
 
 
 
 
 | 175 | 
         jz      L_test_for_second_level_dist | 
 
 
 
 
 
 | 176 | 
         and     cl, 15             ; /* op &= 15 */ | 
 
 
 
 
 
 | 177 | 
         jz      L_check_dist_one | 
 
 
 
 
 
 | 178 | 
  | 
 
 
 
 
 
 | 179 | 
 L_add_bits_to_dist: | 
 
 
 
 
 
 | 180 | 
         sub     bl, cl | 
 
 
 
 
 
 | 181 | 
         xor     eax, eax | 
 
 
 
 
 
 | 182 | 
         inc     eax | 
 
 
 
 
 
 | 183 | 
         shl     eax, cl | 
 
 
 
 
 
 | 184 | 
         dec     eax                 ; /* (1 << op) - 1 */ | 
 
 
 
 
 
 | 185 | 
         and     eax, edx          ; /* eax &= hold */ | 
 
 
 
 
 
 | 186 | 
         shr     rdx, cl | 
 
 
 
 
 
 | 187 | 
         add     r15d, eax         ; /* dist += hold & ((1 << op) - 1) */ | 
 
 
 
 
 
 | 188 | 
  | 
 
 
 
 
 
 | 189 | 
 L_check_window: | 
 
 
 
 
 
 | 190 | 
         mov     r8, rsi           ; /* save in so from can use it's reg */ | 
 
 
 
 
 
 | 191 | 
         mov     rax, rdi | 
 
 
 
 
 
 | 192 | 
         sub     rax, [rsp+40]      ; /* nbytes = out - beg */ | 
 
 
 
 
 
 | 193 | 
  | 
 
 
 
 
 
 | 194 | 
         cmp     eax, r15d | 
 
 
 
 
 
 | 195 | 
         jb      L_clip_window        ; /* if (dist > nbytes) 4.2% */ | 
 
 
 
 
 
 | 196 | 
  | 
 
 
 
 
 
 | 197 | 
         mov     ecx, r14d         ; /* ecx = len */ | 
 
 
 
 
 
 | 198 | 
         mov     rsi, rdi | 
 
 
 
 
 
 | 199 | 
         sub     rsi, r15          ; /* from = out - dist */ | 
 
 
 
 
 
 | 200 | 
  | 
 
 
 
 
 
 | 201 | 
         sar     ecx, 1 | 
 
 
 
 
 
 | 202 | 
         jnc     L_copy_two           ; /* if len % 2 == 0 */ | 
 
 
 
 
 
 | 203 | 
  | 
 
 
 
 
 
 | 204 | 
         rep     movsw | 
 
 
 
 
 
 | 205 | 
         mov     al, [rsi] | 
 
 
 
 
 
 | 206 | 
         mov     [rdi], al | 
 
 
 
 
 
 | 207 | 
         inc     rdi | 
 
 
 
 
 
 | 208 | 
  | 
 
 
 
 
 
 | 209 | 
         mov     rsi, r8           ; /* move in back to %rsi, toss from */ | 
 
 
 
 
 
 | 210 | 
         jmp     L_while_test | 
 
 
 
 
 
 | 211 | 
  | 
 
 
 
 
 
 | 212 | 
 L_copy_two: | 
 
 
 
 
 
 | 213 | 
         rep     movsw | 
 
 
 
 
 
 | 214 | 
         mov     rsi, r8           ; /* move in back to %rsi, toss from */ | 
 
 
 
 
 
 | 215 | 
         jmp     L_while_test | 
 
 
 
 
 
 | 216 | 
  | 
 
 
 
 
 
 | 217 | 
 ALIGN 4 | 
 
 
 
 
 
 | 218 | 
 L_check_dist_one: | 
 
 
 
 
 
 | 219 | 
         cmp     r15d, 1            ; /* if dist 1, is a memset */ | 
 
 
 
 
 
 | 220 | 
         jne     L_check_window | 
 
 
 
 
 
 | 221 | 
         cmp     [rsp+40], rdi      ; /* if out == beg, outside window */ | 
 
 
 
 
 
 | 222 | 
         je      L_check_window | 
 
 
 
 
 
 | 223 | 
  | 
 
 
 
 
 
 | 224 | 
         mov     ecx, r14d         ; /* ecx = len */ | 
 
 
 
 
 
 | 225 | 
         mov     al, [rdi-1] | 
 
 
 
 
 
 | 226 | 
         mov     ah, al | 
 
 
 
 
 
 | 227 | 
  | 
 
 
 
 
 
 | 228 | 
         sar     ecx, 1 | 
 
 
 
 
 
 | 229 | 
         jnc     L_set_two | 
 
 
 
 
 
 | 230 | 
         mov     [rdi], al | 
 
 
 
 
 
 | 231 | 
         inc     rdi | 
 
 
 
 
 
 | 232 | 
  | 
 
 
 
 
 
 | 233 | 
 L_set_two: | 
 
 
 
 
 
 | 234 | 
         rep     stosw | 
 
 
 
 
 
 | 235 | 
         jmp     L_while_test | 
 
 
 
 
 
 | 236 | 
  | 
 
 
 
 
 
 | 237 | 
 ALIGN 4 | 
 
 
 
 
 
 | 238 | 
 L_test_for_second_level_length: | 
 
 
 
 
 
 | 239 | 
         test    al, 64 | 
 
 
 
 
 
 | 240 | 
         jnz     L_test_for_end_of_block ; /* if ((op & 64) != 0) */ | 
 
 
 
 
 
 | 241 | 
  | 
 
 
 
 
 
 | 242 | 
         xor     eax, eax | 
 
 
 
 
 
 | 243 | 
         inc     eax | 
 
 
 
 
 
 | 244 | 
         shl     eax, cl | 
 
 
 
 
 
 | 245 | 
         dec     eax | 
 
 
 
 
 
 | 246 | 
         and     eax, edx         ; /* eax &= hold */ | 
 
 
 
 
 
 | 247 | 
         add     eax, r14d        ; /* eax += len */ | 
 
 
 
 
 
 | 248 | 
         mov     eax, [rbp+rax*4] ; /* eax = lcode[val+(hold&mask[op])]*/ | 
 
 
 
 
 
 | 249 | 
         jmp     L_dolen | 
 
 
 
 
 
 | 250 | 
  | 
 
 
 
 
 
 | 251 | 
 ALIGN 4 | 
 
 
 
 
 
 | 252 | 
 L_test_for_second_level_dist: | 
 
 
 
 
 
 | 253 | 
         test    al, 64 | 
 
 
 
 
 
 | 254 | 
         jnz     L_invalid_distance_code ; /* if ((op & 64) != 0) */ | 
 
 
 
 
 
 | 255 | 
  | 
 
 
 
 
 
 | 256 | 
         xor     eax, eax | 
 
 
 
 
 
 | 257 | 
         inc     eax | 
 
 
 
 
 
 | 258 | 
         shl     eax, cl | 
 
 
 
 
 
 | 259 | 
         dec     eax | 
 
 
 
 
 
 | 260 | 
         and     eax, edx         ; /* eax &= hold */ | 
 
 
 
 
 
 | 261 | 
         add     eax, r15d        ; /* eax += dist */ | 
 
 
 
 
 
 | 262 | 
         mov     eax, [r11+rax*4] ; /* eax = dcode[val+(hold&mask[op])]*/ | 
 
 
 
 
 
 | 263 | 
         jmp     L_dodist | 
 
 
 
 
 
 | 264 | 
  | 
 
 
 
 
 
 | 265 | 
 ALIGN 4 | 
 
 
 
 
 
 | 266 | 
 L_clip_window: | 
 
 
 
 
 
 | 267 | 
         mov     ecx, eax         ; /* ecx = nbytes */ | 
 
 
 
 
 
 | 268 | 
         mov     eax, [rsp+92]     ; /* eax = wsize, prepare for dist cmp */ | 
 
 
 
 
 
 | 269 | 
         neg     ecx                ; /* nbytes = -nbytes */ | 
 
 
 
 
 
 | 270 | 
  | 
 
 
 
 
 
 | 271 | 
         cmp     eax, r15d | 
 
 
 
 
 
 | 272 | 
         jb      L_invalid_distance_too_far ; /* if (dist > wsize) */ | 
 
 
 
 
 
 | 273 | 
  | 
 
 
 
 
 
 | 274 | 
         add     ecx, r15d         ; /* nbytes = dist - nbytes */ | 
 
 
 
 
 
 | 275 | 
         cmp     dword ptr [rsp+96], 0 | 
 
 
 
 
 
 | 276 | 
         jne     L_wrap_around_window ; /* if (write != 0) */ | 
 
 
 
 
 
 | 277 | 
  | 
 
 
 
 
 
 | 278 | 
         mov     rsi, [rsp+56]     ; /* from  = window */ | 
 
 
 
 
 
 | 279 | 
         sub     eax, ecx         ; /* eax  -= nbytes */ | 
 
 
 
 
 
 | 280 | 
         add     rsi, rax         ; /* from += wsize - nbytes */ | 
 
 
 
 
 
 | 281 | 
  | 
 
 
 
 
 
 | 282 | 
         mov     eax, r14d        ; /* eax = len */ | 
 
 
 
 
 
 | 283 | 
         cmp     r14d, ecx | 
 
 
 
 
 
 | 284 | 
         jbe     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 
 | 285 | 
  | 
 
 
 
 
 
 | 286 | 
         sub     eax, ecx         ; /* eax -= nbytes */ | 
 
 
 
 
 
 | 287 | 
         rep     movsb | 
 
 
 
 
 
 | 288 | 
         mov     rsi, rdi | 
 
 
 
 
 
 | 289 | 
         sub     rsi, r15         ; /* from = &out[ -dist ] */ | 
 
 
 
 
 
 | 290 | 
         jmp     L_do_copy | 
 
 
 
 
 
 | 291 | 
  | 
 
 
 
 
 
 | 292 | 
 ALIGN 4 | 
 
 
 
 
 
 | 293 | 
 L_wrap_around_window: | 
 
 
 
 
 
 | 294 | 
         mov     eax, [rsp+96]     ; /* eax = write */ | 
 
 
 
 
 
 | 295 | 
         cmp     ecx, eax | 
 
 
 
 
 
 | 296 | 
         jbe     L_contiguous_in_window ; /* if (write >= nbytes) */ | 
 
 
 
 
 
 | 297 | 
  | 
 
 
 
 
 
 | 298 | 
         mov     esi, [rsp+92]     ; /* from  = wsize */ | 
 
 
 
 
 
 | 299 | 
         add     rsi, [rsp+56]     ; /* from += window */ | 
 
 
 
 
 
 | 300 | 
         add     rsi, rax         ; /* from += write */ | 
 
 
 
 
 
 | 301 | 
         sub     rsi, rcx         ; /* from -= nbytes */ | 
 
 
 
 
 
 | 302 | 
         sub     ecx, eax         ; /* nbytes -= write */ | 
 
 
 
 
 
 | 303 | 
  | 
 
 
 
 
 
 | 304 | 
         mov     eax, r14d        ; /* eax = len */ | 
 
 
 
 
 
 | 305 | 
         cmp     eax, ecx | 
 
 
 
 
 
 | 306 | 
         jbe     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 
 | 307 | 
  | 
 
 
 
 
 
 | 308 | 
         sub     eax, ecx         ; /* len -= nbytes */ | 
 
 
 
 
 
 | 309 | 
         rep     movsb | 
 
 
 
 
 
 | 310 | 
         mov     rsi, [rsp+56]     ; /* from = window */ | 
 
 
 
 
 
 | 311 | 
         mov     ecx, [rsp+96]     ; /* nbytes = write */ | 
 
 
 
 
 
 | 312 | 
         cmp     eax, ecx | 
 
 
 
 
 
 | 313 | 
         jbe     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 
 | 314 | 
  | 
 
 
 
 
 
 | 315 | 
         sub     eax, ecx         ; /* len -= nbytes */ | 
 
 
 
 
 
 | 316 | 
         rep     movsb | 
 
 
 
 
 
 | 317 | 
         mov     rsi, rdi | 
 
 
 
 
 
 | 318 | 
         sub     rsi, r15         ; /* from = out - dist */ | 
 
 
 
 
 
 | 319 | 
         jmp     L_do_copy | 
 
 
 
 
 
 | 320 | 
  | 
 
 
 
 
 
 | 321 | 
 ALIGN 4 | 
 
 
 
 
 
 | 322 | 
 L_contiguous_in_window: | 
 
 
 
 
 
 | 323 | 
         mov     rsi, [rsp+56]     ; /* rsi = window */ | 
 
 
 
 
 
 | 324 | 
         add     rsi, rax | 
 
 
 
 
 
 | 325 | 
         sub     rsi, rcx         ; /* from += write - nbytes */ | 
 
 
 
 
 
 | 326 | 
  | 
 
 
 
 
 
 | 327 | 
         mov     eax, r14d        ; /* eax = len */ | 
 
 
 
 
 
 | 328 | 
         cmp     eax, ecx | 
 
 
 
 
 
 | 329 | 
         jbe     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 
 | 330 | 
  | 
 
 
 
 
 
 | 331 | 
         sub     eax, ecx         ; /* len -= nbytes */ | 
 
 
 
 
 
 | 332 | 
         rep     movsb | 
 
 
 
 
 
 | 333 | 
         mov     rsi, rdi | 
 
 
 
 
 
 | 334 | 
         sub     rsi, r15         ; /* from = out - dist */ | 
 
 
 
 
 
 | 335 | 
         jmp     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 
 | 336 | 
  | 
 
 
 
 
 
 | 337 | 
 ALIGN 4 | 
 
 
 
 
 
 | 338 | 
 L_do_copy: | 
 
 
 
 
 
 | 339 | 
         mov     ecx, eax         ; /* ecx = len */ | 
 
 
 
 
 
 | 340 | 
         rep     movsb | 
 
 
 
 
 
 | 341 | 
  | 
 
 
 
 
 
 | 342 | 
         mov     rsi, r8          ; /* move in back to %esi, toss from */ | 
 
 
 
 
 
 | 343 | 
         jmp     L_while_test | 
 
 
 
 
 
 | 344 | 
  | 
 
 
 
 
 
 | 345 | 
 L_test_for_end_of_block: | 
 
 
 
 
 
 | 346 | 
         test    al, 32 | 
 
 
 
 
 
 | 347 | 
         jz      L_invalid_literal_length_code | 
 
 
 
 
 
 | 348 | 
         mov     dword ptr [rsp+116], 1 | 
 
 
 
 
 
 | 349 | 
         jmp     L_break_loop_with_status | 
 
 
 
 
 
 | 350 | 
  | 
 
 
 
 
 
 | 351 | 
 L_invalid_literal_length_code: | 
 
 
 
 
 
 | 352 | 
         mov     dword ptr [rsp+116], 2 | 
 
 
 
 
 
 | 353 | 
         jmp     L_break_loop_with_status | 
 
 
 
 
 
 | 354 | 
  | 
 
 
 
 
 
 | 355 | 
 L_invalid_distance_code: | 
 
 
 
 
 
 | 356 | 
         mov     dword ptr [rsp+116], 3 | 
 
 
 
 
 
 | 357 | 
         jmp     L_break_loop_with_status | 
 
 
 
 
 
 | 358 | 
  | 
 
 
 
 
 
 | 359 | 
 L_invalid_distance_too_far: | 
 
 
 
 
 
 | 360 | 
         mov     dword ptr [rsp+116], 4 | 
 
 
 
 
 
 | 361 | 
         jmp     L_break_loop_with_status | 
 
 
 
 
 
 | 362 | 
  | 
 
 
 
 
 
 | 363 | 
 L_break_loop: | 
 
 
 
 
 
 | 364 | 
         mov     dword ptr [rsp+116], 0 | 
 
 
 
 
 
 | 365 | 
  | 
 
 
 
 
 
 | 366 | 
 L_break_loop_with_status: | 
 
 
 
 
 
 | 367 | 
 ; /* put in, out, bits, and hold back into ar and pop esp */ | 
 
 
 
 
 
 | 368 | 
         mov     [rsp+16], rsi     ; /* in */ | 
 
 
 
 
 
 | 369 | 
         mov     [rsp+32], rdi     ; /* out */ | 
 
 
 
 
 
 | 370 | 
         mov     [rsp+88], ebx     ; /* bits */ | 
 
 
 
 
 
 | 371 | 
         mov     [rsp+80], rdx     ; /* hold */ | 
 
 
 
 
 
 | 372 | 
  | 
 
 
 
 
 
 | 373 | 
         mov     rax, [rsp]       ; /* restore rbp and rsp */ | 
 
 
 
 
 
 | 374 | 
         mov     rbp, [rsp+8] | 
 
 
 
 
 
 | 375 | 
         mov     rsp, rax | 
 
 
 
 
 
 | 376 | 
  | 
 
 
 
 
 
 | 377 | 
  | 
 
 
 
 
 
 | 378 | 
  | 
 
 
 
 
 
 | 379 | 
         mov rsi,[rsp-8] | 
 
 
 
 
 
 | 380 | 
         mov rdi,[rsp-16] | 
 
 
 
 
 
 | 381 | 
         mov r12,[rsp-24] | 
 
 
 
 
 
 | 382 | 
         mov r13,[rsp-32] | 
 
 
 
 
 
 | 383 | 
         mov r14,[rsp-40] | 
 
 
 
 
 
 | 384 | 
         mov r15,[rsp-48] | 
 
 
 
 
 
 | 385 | 
         mov rbx,[rsp-56] | 
 
 
 
 
 
 | 386 | 
  | 
 
 
 
 
 
 | 387 | 
     ret 0 | 
 
 
 
 
 
 | 388 | 
 ;          : | 
 
 
 
 
 
 | 389 | 
 ;          : "m" (ar) | 
 
 
 
 
 
 | 390 | 
 ;          : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", | 
 
 
 
 
 
 | 391 | 
 ;            "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" | 
 
 
 
 
 
 | 392 | 
 ;    ); | 
 
 
 
 
 
 | 393 | 
  | 
 
 
 
 
 
 | 394 | 
 inffas8664fnc   ENDP | 
 
 
 
 
 
 | 395 | 
 ;_TEXT  ENDS | 
 
 
 
 
 
 | 396 | 
 END |