| 1 | ; inffasx64.asm is a hand tuned assembler version of inffast.c - fast decoding | 
 
 
 
 
 | 2 | ; version for AMD64 on Windows using Microsoft C compiler | 
 
 
 
 
 | 3 | ; | 
 
 
 
 
 | 4 | ; inffasx64.asm is automatically convert from AMD64 portion of inffas86.c | 
 
 
 
 
 | 5 | ; inffasx64.asm is called by inffas8664.c, which contain more info. | 
 
 
 
 
 | 6 |  | 
 
 
 
 
 | 7 |  | 
 
 
 
 
 | 8 | ; to compile this file, I use option | 
 
 
 
 
 | 9 | ;   ml64.exe /Flinffasx64 /c /Zi inffasx64.asm | 
 
 
 
 
 | 10 | ;   with Microsoft Macro Assembler (x64) for AMD64 | 
 
 
 
 
 | 11 | ; | 
 
 
 
 
 | 12 |  | 
 
 
 
 
 | 13 | ; This file compile with Microsoft Macro Assembler (x64) for AMD64 | 
 
 
 
 
 | 14 | ; | 
 
 
 
 
 | 15 | ;   ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK | 
 
 
 
 
 | 16 | ; | 
 
 
 
 
 | 17 | ;   (you can get Windows WDK with ml64 for AMD64 from | 
 
 
 
 
 | 18 | ;      http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price) | 
 
 
 
 
 | 19 | ; | 
 
 
 
 
 | 20 |  | 
 
 
 
 
 | 21 |  | 
 
 
 
 
 | 22 | .code | 
 
 
 
 
 | 23 | inffas8664fnc PROC | 
 
 
 
 
 | 24 |  | 
 
 
 
 
 | 25 | ; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and | 
 
 
 
 
 | 26 | ; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp | 
 
 
 
 
 | 27 | ; | 
 
 
 
 
 | 28 | ; All registers must be preserved across the call, except for | 
 
 
 
 
 | 29 | ;   rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch. | 
 
 
 
 
 | 30 |  | 
 
 
 
 
 | 31 |  | 
 
 
 
 
 | 32 | mov [rsp-8],rsi | 
 
 
 
 
 | 33 | mov [rsp-16],rdi | 
 
 
 
 
 | 34 | mov [rsp-24],r12 | 
 
 
 
 
 | 35 | mov [rsp-32],r13 | 
 
 
 
 
 | 36 | mov [rsp-40],r14 | 
 
 
 
 
 | 37 | mov [rsp-48],r15 | 
 
 
 
 
 | 38 | mov [rsp-56],rbx | 
 
 
 
 
 | 39 |  | 
 
 
 
 
 | 40 | mov rax,rcx | 
 
 
 
 
 | 41 |  | 
 
 
 
 
 | 42 | mov     [rax+8], rbp       ; /* save regs rbp and rsp */ | 
 
 
 
 
 | 43 | mov     [rax], rsp | 
 
 
 
 
 | 44 |  | 
 
 
 
 
 | 45 | mov     rsp, rax          ; /* make rsp point to &ar */ | 
 
 
 
 
 | 46 |  | 
 
 
 
 
 | 47 | mov     rsi, [rsp+16]      ; /* rsi  = in */ | 
 
 
 
 
 | 48 | mov     rdi, [rsp+32]      ; /* rdi  = out */ | 
 
 
 
 
 | 49 | mov     r9, [rsp+24]       ; /* r9   = last */ | 
 
 
 
 
 | 50 | mov     r10, [rsp+48]      ; /* r10  = end */ | 
 
 
 
 
 | 51 | mov     rbp, [rsp+64]      ; /* rbp  = lcode */ | 
 
 
 
 
 | 52 | mov     r11, [rsp+72]      ; /* r11  = dcode */ | 
 
 
 
 
 | 53 | mov     rdx, [rsp+80]      ; /* rdx  = hold */ | 
 
 
 
 
 | 54 | mov     ebx, [rsp+88]      ; /* ebx  = bits */ | 
 
 
 
 
 | 55 | mov     r12d, [rsp+100]    ; /* r12d = lmask */ | 
 
 
 
 
 | 56 | mov     r13d, [rsp+104]    ; /* r13d = dmask */ | 
 
 
 
 
 | 57 | ; /* r14d = len */ | 
 
 
 
 
 | 58 | ; /* r15d = dist */ | 
 
 
 
 
 | 59 |  | 
 
 
 
 
 | 60 |  | 
 
 
 
 
 | 61 | cld | 
 
 
 
 
 | 62 | cmp     r10, rdi | 
 
 
 
 
 | 63 | je      L_one_time           ; /* if only one decode left */ | 
 
 
 
 
 | 64 | cmp     r9, rsi | 
 
 
 
 
 | 65 |  | 
 
 
 
 
 | 66 | jne L_do_loop | 
 
 
 
 
 | 67 |  | 
 
 
 
 
 | 68 |  | 
 
 
 
 
 | 69 | L_one_time: | 
 
 
 
 
 | 70 | mov     r8, r12           ; /* r8 = lmask */ | 
 
 
 
 
 | 71 | cmp     bl, 32 | 
 
 
 
 
 | 72 | ja      L_get_length_code_one_time | 
 
 
 
 
 | 73 |  | 
 
 
 
 
 | 74 | lodsd                         ; /* eax = *(uint *)in++ */ | 
 
 
 
 
 | 75 | mov     cl, bl            ; /* cl = bits, needs it for shifting */ | 
 
 
 
 
 | 76 | add     bl, 32             ; /* bits += 32 */ | 
 
 
 
 
 | 77 | shl     rax, cl | 
 
 
 
 
 | 78 | or      rdx, rax          ; /* hold |= *((uint *)in)++ << bits */ | 
 
 
 
 
 | 79 | jmp     L_get_length_code_one_time | 
 
 
 
 
 | 80 |  | 
 
 
 
 
 | 81 | ALIGN 4 | 
 
 
 
 
 | 82 | L_while_test: | 
 
 
 
 
 | 83 | cmp     r10, rdi | 
 
 
 
 
 | 84 | jbe     L_break_loop | 
 
 
 
 
 | 85 | cmp     r9, rsi | 
 
 
 
 
 | 86 | jbe     L_break_loop | 
 
 
 
 
 | 87 |  | 
 
 
 
 
 | 88 | L_do_loop: | 
 
 
 
 
 | 89 | mov     r8, r12           ; /* r8 = lmask */ | 
 
 
 
 
 | 90 | cmp     bl, 32 | 
 
 
 
 
 | 91 | ja      L_get_length_code    ; /* if (32 < bits) */ | 
 
 
 
 
 | 92 |  | 
 
 
 
 
 | 93 | lodsd                         ; /* eax = *(uint *)in++ */ | 
 
 
 
 
 | 94 | mov     cl, bl            ; /* cl = bits, needs it for shifting */ | 
 
 
 
 
 | 95 | add     bl, 32             ; /* bits += 32 */ | 
 
 
 
 
 | 96 | shl     rax, cl | 
 
 
 
 
 | 97 | or      rdx, rax          ; /* hold |= *((uint *)in)++ << bits */ | 
 
 
 
 
 | 98 |  | 
 
 
 
 
 | 99 | L_get_length_code: | 
 
 
 
 
 | 100 | and     r8, rdx            ; /* r8 &= hold */ | 
 
 
 
 
 | 101 | mov     eax, [rbp+r8*4]  ; /* eax = lcode[hold & lmask] */ | 
 
 
 
 
 | 102 |  | 
 
 
 
 
 | 103 | mov     cl, ah            ; /* cl = this.bits */ | 
 
 
 
 
 | 104 | sub     bl, ah            ; /* bits -= this.bits */ | 
 
 
 
 
 | 105 | shr     rdx, cl           ; /* hold >>= this.bits */ | 
 
 
 
 
 | 106 |  | 
 
 
 
 
 | 107 | test    al, al | 
 
 
 
 
 | 108 | jnz     L_test_for_length_base ; /* if (op != 0) 45.7% */ | 
 
 
 
 
 | 109 |  | 
 
 
 
 
 | 110 | mov     r8, r12            ; /* r8 = lmask */ | 
 
 
 
 
 | 111 | shr     eax, 16            ; /* output this.val char */ | 
 
 
 
 
 | 112 | stosb | 
 
 
 
 
 | 113 |  | 
 
 
 
 
 | 114 | L_get_length_code_one_time: | 
 
 
 
 
 | 115 | and     r8, rdx            ; /* r8 &= hold */ | 
 
 
 
 
 | 116 | mov     eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */ | 
 
 
 
 
 | 117 |  | 
 
 
 
 
 | 118 | L_dolen: | 
 
 
 
 
 | 119 | mov     cl, ah            ; /* cl = this.bits */ | 
 
 
 
 
 | 120 | sub     bl, ah            ; /* bits -= this.bits */ | 
 
 
 
 
 | 121 | shr     rdx, cl           ; /* hold >>= this.bits */ | 
 
 
 
 
 | 122 |  | 
 
 
 
 
 | 123 | test    al, al | 
 
 
 
 
 | 124 | jnz     L_test_for_length_base ; /* if (op != 0) 45.7% */ | 
 
 
 
 
 | 125 |  | 
 
 
 
 
 | 126 | shr     eax, 16            ; /* output this.val char */ | 
 
 
 
 
 | 127 | stosb | 
 
 
 
 
 | 128 | jmp     L_while_test | 
 
 
 
 
 | 129 |  | 
 
 
 
 
 | 130 | ALIGN 4 | 
 
 
 
 
 | 131 | L_test_for_length_base: | 
 
 
 
 
 | 132 | mov     r14d, eax         ; /* len = this */ | 
 
 
 
 
 | 133 | shr     r14d, 16           ; /* len = this.val */ | 
 
 
 
 
 | 134 | mov     cl, al | 
 
 
 
 
 | 135 |  | 
 
 
 
 
 | 136 | test    al, 16 | 
 
 
 
 
 | 137 | jz      L_test_for_second_level_length ; /* if ((op & 16) == 0) 8% */ | 
 
 
 
 
 | 138 | and     cl, 15             ; /* op &= 15 */ | 
 
 
 
 
 | 139 | jz      L_decode_distance    ; /* if (!op) */ | 
 
 
 
 
 | 140 |  | 
 
 
 
 
 | 141 | L_add_bits_to_len: | 
 
 
 
 
 | 142 | sub     bl, cl | 
 
 
 
 
 | 143 | xor     eax, eax | 
 
 
 
 
 | 144 | inc     eax | 
 
 
 
 
 | 145 | shl     eax, cl | 
 
 
 
 
 | 146 | dec     eax | 
 
 
 
 
 | 147 | and     eax, edx          ; /* eax &= hold */ | 
 
 
 
 
 | 148 | shr     rdx, cl | 
 
 
 
 
 | 149 | add     r14d, eax         ; /* len += hold & mask[op] */ | 
 
 
 
 
 | 150 |  | 
 
 
 
 
 | 151 | L_decode_distance: | 
 
 
 
 
 | 152 | mov     r8, r13           ; /* r8 = dmask */ | 
 
 
 
 
 | 153 | cmp     bl, 32 | 
 
 
 
 
 | 154 | ja      L_get_distance_code  ; /* if (32 < bits) */ | 
 
 
 
 
 | 155 |  | 
 
 
 
 
 | 156 | lodsd                         ; /* eax = *(uint *)in++ */ | 
 
 
 
 
 | 157 | mov     cl, bl            ; /* cl = bits, needs it for shifting */ | 
 
 
 
 
 | 158 | add     bl, 32             ; /* bits += 32 */ | 
 
 
 
 
 | 159 | shl     rax, cl | 
 
 
 
 
 | 160 | or      rdx, rax          ; /* hold |= *((uint *)in)++ << bits */ | 
 
 
 
 
 | 161 |  | 
 
 
 
 
 | 162 | L_get_distance_code: | 
 
 
 
 
 | 163 | and     r8, rdx           ; /* r8 &= hold */ | 
 
 
 
 
 | 164 | mov     eax, [r11+r8*4] ; /* eax = dcode[hold & dmask] */ | 
 
 
 
 
 | 165 |  | 
 
 
 
 
 | 166 | L_dodist: | 
 
 
 
 
 | 167 | mov     r15d, eax         ; /* dist = this */ | 
 
 
 
 
 | 168 | shr     r15d, 16           ; /* dist = this.val */ | 
 
 
 
 
 | 169 | mov     cl, ah | 
 
 
 
 
 | 170 | sub     bl, ah            ; /* bits -= this.bits */ | 
 
 
 
 
 | 171 | shr     rdx, cl           ; /* hold >>= this.bits */ | 
 
 
 
 
 | 172 | mov     cl, al            ; /* cl = this.op */ | 
 
 
 
 
 | 173 |  | 
 
 
 
 
 | 174 | test    al, 16             ; /* if ((op & 16) == 0) */ | 
 
 
 
 
 | 175 | jz      L_test_for_second_level_dist | 
 
 
 
 
 | 176 | and     cl, 15             ; /* op &= 15 */ | 
 
 
 
 
 | 177 | jz      L_check_dist_one | 
 
 
 
 
 | 178 |  | 
 
 
 
 
 | 179 | L_add_bits_to_dist: | 
 
 
 
 
 | 180 | sub     bl, cl | 
 
 
 
 
 | 181 | xor     eax, eax | 
 
 
 
 
 | 182 | inc     eax | 
 
 
 
 
 | 183 | shl     eax, cl | 
 
 
 
 
 | 184 | dec     eax                 ; /* (1 << op) - 1 */ | 
 
 
 
 
 | 185 | and     eax, edx          ; /* eax &= hold */ | 
 
 
 
 
 | 186 | shr     rdx, cl | 
 
 
 
 
 | 187 | add     r15d, eax         ; /* dist += hold & ((1 << op) - 1) */ | 
 
 
 
 
 | 188 |  | 
 
 
 
 
 | 189 | L_check_window: | 
 
 
 
 
 | 190 | mov     r8, rsi           ; /* save in so from can use it's reg */ | 
 
 
 
 
 | 191 | mov     rax, rdi | 
 
 
 
 
 | 192 | sub     rax, [rsp+40]      ; /* nbytes = out - beg */ | 
 
 
 
 
 | 193 |  | 
 
 
 
 
 | 194 | cmp     eax, r15d | 
 
 
 
 
 | 195 | jb      L_clip_window        ; /* if (dist > nbytes) 4.2% */ | 
 
 
 
 
 | 196 |  | 
 
 
 
 
 | 197 | mov     ecx, r14d         ; /* ecx = len */ | 
 
 
 
 
 | 198 | mov     rsi, rdi | 
 
 
 
 
 | 199 | sub     rsi, r15          ; /* from = out - dist */ | 
 
 
 
 
 | 200 |  | 
 
 
 
 
 | 201 | sar     ecx, 1 | 
 
 
 
 
 | 202 | jnc     L_copy_two           ; /* if len % 2 == 0 */ | 
 
 
 
 
 | 203 |  | 
 
 
 
 
 | 204 | rep     movsw | 
 
 
 
 
 | 205 | mov     al, [rsi] | 
 
 
 
 
 | 206 | mov     [rdi], al | 
 
 
 
 
 | 207 | inc     rdi | 
 
 
 
 
 | 208 |  | 
 
 
 
 
 | 209 | mov     rsi, r8           ; /* move in back to %rsi, toss from */ | 
 
 
 
 
 | 210 | jmp     L_while_test | 
 
 
 
 
 | 211 |  | 
 
 
 
 
 | 212 | L_copy_two: | 
 
 
 
 
 | 213 | rep     movsw | 
 
 
 
 
 | 214 | mov     rsi, r8           ; /* move in back to %rsi, toss from */ | 
 
 
 
 
 | 215 | jmp     L_while_test | 
 
 
 
 
 | 216 |  | 
 
 
 
 
 | 217 | ALIGN 4 | 
 
 
 
 
 | 218 | L_check_dist_one: | 
 
 
 
 
 | 219 | cmp     r15d, 1            ; /* if dist 1, is a memset */ | 
 
 
 
 
 | 220 | jne     L_check_window | 
 
 
 
 
 | 221 | cmp     [rsp+40], rdi      ; /* if out == beg, outside window */ | 
 
 
 
 
 | 222 | je      L_check_window | 
 
 
 
 
 | 223 |  | 
 
 
 
 
 | 224 | mov     ecx, r14d         ; /* ecx = len */ | 
 
 
 
 
 | 225 | mov     al, [rdi-1] | 
 
 
 
 
 | 226 | mov     ah, al | 
 
 
 
 
 | 227 |  | 
 
 
 
 
 | 228 | sar     ecx, 1 | 
 
 
 
 
 | 229 | jnc     L_set_two | 
 
 
 
 
 | 230 | mov     [rdi], al | 
 
 
 
 
 | 231 | inc     rdi | 
 
 
 
 
 | 232 |  | 
 
 
 
 
 | 233 | L_set_two: | 
 
 
 
 
 | 234 | rep     stosw | 
 
 
 
 
 | 235 | jmp     L_while_test | 
 
 
 
 
 | 236 |  | 
 
 
 
 
 | 237 | ALIGN 4 | 
 
 
 
 
 | 238 | L_test_for_second_level_length: | 
 
 
 
 
 | 239 | test    al, 64 | 
 
 
 
 
 | 240 | jnz     L_test_for_end_of_block ; /* if ((op & 64) != 0) */ | 
 
 
 
 
 | 241 |  | 
 
 
 
 
 | 242 | xor     eax, eax | 
 
 
 
 
 | 243 | inc     eax | 
 
 
 
 
 | 244 | shl     eax, cl | 
 
 
 
 
 | 245 | dec     eax | 
 
 
 
 
 | 246 | and     eax, edx         ; /* eax &= hold */ | 
 
 
 
 
 | 247 | add     eax, r14d        ; /* eax += len */ | 
 
 
 
 
 | 248 | mov     eax, [rbp+rax*4] ; /* eax = lcode[val+(hold&mask[op])]*/ | 
 
 
 
 
 | 249 | jmp     L_dolen | 
 
 
 
 
 | 250 |  | 
 
 
 
 
 | 251 | ALIGN 4 | 
 
 
 
 
 | 252 | L_test_for_second_level_dist: | 
 
 
 
 
 | 253 | test    al, 64 | 
 
 
 
 
 | 254 | jnz     L_invalid_distance_code ; /* if ((op & 64) != 0) */ | 
 
 
 
 
 | 255 |  | 
 
 
 
 
 | 256 | xor     eax, eax | 
 
 
 
 
 | 257 | inc     eax | 
 
 
 
 
 | 258 | shl     eax, cl | 
 
 
 
 
 | 259 | dec     eax | 
 
 
 
 
 | 260 | and     eax, edx         ; /* eax &= hold */ | 
 
 
 
 
 | 261 | add     eax, r15d        ; /* eax += dist */ | 
 
 
 
 
 | 262 | mov     eax, [r11+rax*4] ; /* eax = dcode[val+(hold&mask[op])]*/ | 
 
 
 
 
 | 263 | jmp     L_dodist | 
 
 
 
 
 | 264 |  | 
 
 
 
 
 | 265 | ALIGN 4 | 
 
 
 
 
 | 266 | L_clip_window: | 
 
 
 
 
 | 267 | mov     ecx, eax         ; /* ecx = nbytes */ | 
 
 
 
 
 | 268 | mov     eax, [rsp+92]     ; /* eax = wsize, prepare for dist cmp */ | 
 
 
 
 
 | 269 | neg     ecx                ; /* nbytes = -nbytes */ | 
 
 
 
 
 | 270 |  | 
 
 
 
 
 | 271 | cmp     eax, r15d | 
 
 
 
 
 | 272 | jb      L_invalid_distance_too_far ; /* if (dist > wsize) */ | 
 
 
 
 
 | 273 |  | 
 
 
 
 
 | 274 | add     ecx, r15d         ; /* nbytes = dist - nbytes */ | 
 
 
 
 
 | 275 | cmp     dword ptr [rsp+96], 0 | 
 
 
 
 
 | 276 | jne     L_wrap_around_window ; /* if (write != 0) */ | 
 
 
 
 
 | 277 |  | 
 
 
 
 
 | 278 | mov     rsi, [rsp+56]     ; /* from  = window */ | 
 
 
 
 
 | 279 | sub     eax, ecx         ; /* eax  -= nbytes */ | 
 
 
 
 
 | 280 | add     rsi, rax         ; /* from += wsize - nbytes */ | 
 
 
 
 
 | 281 |  | 
 
 
 
 
 | 282 | mov     eax, r14d        ; /* eax = len */ | 
 
 
 
 
 | 283 | cmp     r14d, ecx | 
 
 
 
 
 | 284 | jbe     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 | 285 |  | 
 
 
 
 
 | 286 | sub     eax, ecx         ; /* eax -= nbytes */ | 
 
 
 
 
 | 287 | rep     movsb | 
 
 
 
 
 | 288 | mov     rsi, rdi | 
 
 
 
 
 | 289 | sub     rsi, r15         ; /* from = &out[ -dist ] */ | 
 
 
 
 
 | 290 | jmp     L_do_copy | 
 
 
 
 
 | 291 |  | 
 
 
 
 
 | 292 | ALIGN 4 | 
 
 
 
 
 | 293 | L_wrap_around_window: | 
 
 
 
 
 | 294 | mov     eax, [rsp+96]     ; /* eax = write */ | 
 
 
 
 
 | 295 | cmp     ecx, eax | 
 
 
 
 
 | 296 | jbe     L_contiguous_in_window ; /* if (write >= nbytes) */ | 
 
 
 
 
 | 297 |  | 
 
 
 
 
 | 298 | mov     esi, [rsp+92]     ; /* from  = wsize */ | 
 
 
 
 
 | 299 | add     rsi, [rsp+56]     ; /* from += window */ | 
 
 
 
 
 | 300 | add     rsi, rax         ; /* from += write */ | 
 
 
 
 
 | 301 | sub     rsi, rcx         ; /* from -= nbytes */ | 
 
 
 
 
 | 302 | sub     ecx, eax         ; /* nbytes -= write */ | 
 
 
 
 
 | 303 |  | 
 
 
 
 
 | 304 | mov     eax, r14d        ; /* eax = len */ | 
 
 
 
 
 | 305 | cmp     eax, ecx | 
 
 
 
 
 | 306 | jbe     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 | 307 |  | 
 
 
 
 
 | 308 | sub     eax, ecx         ; /* len -= nbytes */ | 
 
 
 
 
 | 309 | rep     movsb | 
 
 
 
 
 | 310 | mov     rsi, [rsp+56]     ; /* from = window */ | 
 
 
 
 
 | 311 | mov     ecx, [rsp+96]     ; /* nbytes = write */ | 
 
 
 
 
 | 312 | cmp     eax, ecx | 
 
 
 
 
 | 313 | jbe     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 | 314 |  | 
 
 
 
 
 | 315 | sub     eax, ecx         ; /* len -= nbytes */ | 
 
 
 
 
 | 316 | rep     movsb | 
 
 
 
 
 | 317 | mov     rsi, rdi | 
 
 
 
 
 | 318 | sub     rsi, r15         ; /* from = out - dist */ | 
 
 
 
 
 | 319 | jmp     L_do_copy | 
 
 
 
 
 | 320 |  | 
 
 
 
 
 | 321 | ALIGN 4 | 
 
 
 
 
 | 322 | L_contiguous_in_window: | 
 
 
 
 
 | 323 | mov     rsi, [rsp+56]     ; /* rsi = window */ | 
 
 
 
 
 | 324 | add     rsi, rax | 
 
 
 
 
 | 325 | sub     rsi, rcx         ; /* from += write - nbytes */ | 
 
 
 
 
 | 326 |  | 
 
 
 
 
 | 327 | mov     eax, r14d        ; /* eax = len */ | 
 
 
 
 
 | 328 | cmp     eax, ecx | 
 
 
 
 
 | 329 | jbe     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 | 330 |  | 
 
 
 
 
 | 331 | sub     eax, ecx         ; /* len -= nbytes */ | 
 
 
 
 
 | 332 | rep     movsb | 
 
 
 
 
 | 333 | mov     rsi, rdi | 
 
 
 
 
 | 334 | sub     rsi, r15         ; /* from = out - dist */ | 
 
 
 
 
 | 335 | jmp     L_do_copy           ; /* if (nbytes >= len) */ | 
 
 
 
 
 | 336 |  | 
 
 
 
 
 | 337 | ALIGN 4 | 
 
 
 
 
 | 338 | L_do_copy: | 
 
 
 
 
 | 339 | mov     ecx, eax         ; /* ecx = len */ | 
 
 
 
 
 | 340 | rep     movsb | 
 
 
 
 
 | 341 |  | 
 
 
 
 
 | 342 | mov     rsi, r8          ; /* move in back to %esi, toss from */ | 
 
 
 
 
 | 343 | jmp     L_while_test | 
 
 
 
 
 | 344 |  | 
 
 
 
 
 | 345 | L_test_for_end_of_block: | 
 
 
 
 
 | 346 | test    al, 32 | 
 
 
 
 
 | 347 | jz      L_invalid_literal_length_code | 
 
 
 
 
 | 348 | mov     dword ptr [rsp+116], 1 | 
 
 
 
 
 | 349 | jmp     L_break_loop_with_status | 
 
 
 
 
 | 350 |  | 
 
 
 
 
 | 351 | L_invalid_literal_length_code: | 
 
 
 
 
 | 352 | mov     dword ptr [rsp+116], 2 | 
 
 
 
 
 | 353 | jmp     L_break_loop_with_status | 
 
 
 
 
 | 354 |  | 
 
 
 
 
 | 355 | L_invalid_distance_code: | 
 
 
 
 
 | 356 | mov     dword ptr [rsp+116], 3 | 
 
 
 
 
 | 357 | jmp     L_break_loop_with_status | 
 
 
 
 
 | 358 |  | 
 
 
 
 
 | 359 | L_invalid_distance_too_far: | 
 
 
 
 
 | 360 | mov     dword ptr [rsp+116], 4 | 
 
 
 
 
 | 361 | jmp     L_break_loop_with_status | 
 
 
 
 
 | 362 |  | 
 
 
 
 
 | 363 | L_break_loop: | 
 
 
 
 
 | 364 | mov     dword ptr [rsp+116], 0 | 
 
 
 
 
 | 365 |  | 
 
 
 
 
 | 366 | L_break_loop_with_status: | 
 
 
 
 
 | 367 | ; /* put in, out, bits, and hold back into ar and pop esp */ | 
 
 
 
 
 | 368 | mov     [rsp+16], rsi     ; /* in */ | 
 
 
 
 
 | 369 | mov     [rsp+32], rdi     ; /* out */ | 
 
 
 
 
 | 370 | mov     [rsp+88], ebx     ; /* bits */ | 
 
 
 
 
 | 371 | mov     [rsp+80], rdx     ; /* hold */ | 
 
 
 
 
 | 372 |  | 
 
 
 
 
 | 373 | mov     rax, [rsp]       ; /* restore rbp and rsp */ | 
 
 
 
 
 | 374 | mov     rbp, [rsp+8] | 
 
 
 
 
 | 375 | mov     rsp, rax | 
 
 
 
 
 | 376 |  | 
 
 
 
 
 | 377 |  | 
 
 
 
 
 | 378 |  | 
 
 
 
 
 | 379 | mov rsi,[rsp-8] | 
 
 
 
 
 | 380 | mov rdi,[rsp-16] | 
 
 
 
 
 | 381 | mov r12,[rsp-24] | 
 
 
 
 
 | 382 | mov r13,[rsp-32] | 
 
 
 
 
 | 383 | mov r14,[rsp-40] | 
 
 
 
 
 | 384 | mov r15,[rsp-48] | 
 
 
 
 
 | 385 | mov rbx,[rsp-56] | 
 
 
 
 
 | 386 |  | 
 
 
 
 
 | 387 | ret 0 | 
 
 
 
 
 | 388 | ;          : | 
 
 
 
 
 | 389 | ;          : "m" (ar) | 
 
 
 
 
 | 390 | ;          : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", | 
 
 
 
 
 | 391 | ;            "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" | 
 
 
 
 
 | 392 | ;    ); | 
 
 
 
 
 | 393 |  | 
 
 
 
 
 | 394 | inffas8664fnc   ENDP | 
 
 
 
 
 | 395 | ;_TEXT  ENDS | 
 
 
 
 
 | 396 | END |