You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

559 lines
14 KiB

  1. ;
  2. ; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
  3. ; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
  4. ; File written by Gilles Vollant, by modifiying the longest_match
  5. ; from Jean-loup Gailly in deflate.c
  6. ; It need wmask == 0x7fff
  7. ; (assembly code is faster with a fixed wmask)
  8. ;
  9. ; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK)
  10. ; I compile with : "ml /coff /Zi /c gvmat32.asm"
  11. ;
  12. ;uInt longest_match_7fff(s, cur_match)
  13. ; deflate_state *s;
  14. ; IPos cur_match; /* current match */
  15. NbStack equ 76
  16. cur_match equ dword ptr[esp+NbStack-0]
  17. str_s equ dword ptr[esp+NbStack-4]
  18. ; 5 dword on top (ret,ebp,esi,edi,ebx)
  19. adrret equ dword ptr[esp+NbStack-8]
  20. pushebp equ dword ptr[esp+NbStack-12]
  21. pushedi equ dword ptr[esp+NbStack-16]
  22. pushesi equ dword ptr[esp+NbStack-20]
  23. pushebx equ dword ptr[esp+NbStack-24]
  24. chain_length equ dword ptr [esp+NbStack-28]
  25. limit equ dword ptr [esp+NbStack-32]
  26. best_len equ dword ptr [esp+NbStack-36]
  27. window equ dword ptr [esp+NbStack-40]
  28. prev equ dword ptr [esp+NbStack-44]
  29. scan_start equ word ptr [esp+NbStack-48]
  30. wmask equ dword ptr [esp+NbStack-52]
  31. match_start_ptr equ dword ptr [esp+NbStack-56]
  32. nice_match equ dword ptr [esp+NbStack-60]
  33. scan equ dword ptr [esp+NbStack-64]
  34. windowlen equ dword ptr [esp+NbStack-68]
  35. match_start equ dword ptr [esp+NbStack-72]
  36. strend equ dword ptr [esp+NbStack-76]
  37. NbStackAdd equ (NbStack-24)
  38. .386p
  39. name gvmatch
  40. .MODEL FLAT
  41. ; all the +4 offsets are due to the addition of pending_buf_size (in zlib
  42. ; in the deflate_state structure since the asm code was first written
  43. ; (if you compile with zlib 1.0.4 or older, remove the +4).
  44. ; Note : these value are good with a 8 bytes boundary pack structure
  45. dep_chain_length equ 70h+4
  46. dep_window equ 2ch+4
  47. dep_strstart equ 60h+4
  48. dep_prev_length equ 6ch+4
  49. dep_nice_match equ 84h+4
  50. dep_w_size equ 20h+4
  51. dep_prev equ 34h+4
  52. dep_w_mask equ 28h+4
  53. dep_good_match equ 80h+4
  54. dep_match_start equ 64h+4
  55. dep_lookahead equ 68h+4
  56. _TEXT segment
  57. IFDEF NOUNDERLINE
  58. public longest_match_7fff
  59. ; public match_init
  60. ELSE
  61. public _longest_match_7fff
  62. ; public _match_init
  63. ENDIF
  64. MAX_MATCH equ 258
  65. MIN_MATCH equ 3
  66. MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
  67. IFDEF NOUNDERLINE
  68. ;match_init proc near
  69. ; ret
  70. ;match_init endp
  71. ELSE
  72. ;_match_init proc near
  73. ; ret
  74. ;_match_init endp
  75. ENDIF
  76. IFDEF NOUNDERLINE
  77. longest_match_7fff proc near
  78. ELSE
  79. _longest_match_7fff proc near
  80. ENDIF
  81. mov edx,[esp+4]
  82. push ebp
  83. push edi
  84. push esi
  85. push ebx
  86. sub esp,NbStackAdd
  87. ; initialize or check the variables used in match.asm.
  88. mov ebp,edx
  89. ; chain_length = s->max_chain_length
  90. ; if (prev_length>=good_match) chain_length >>= 2
  91. mov edx,[ebp+dep_chain_length]
  92. mov ebx,[ebp+dep_prev_length]
  93. cmp [ebp+dep_good_match],ebx
  94. ja noshr
  95. shr edx,2
  96. noshr:
  97. ; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
  98. inc edx
  99. mov edi,[ebp+dep_nice_match]
  100. mov chain_length,edx
  101. mov eax,[ebp+dep_lookahead]
  102. cmp eax,edi
  103. ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
  104. jae nolookaheadnicematch
  105. mov edi,eax
  106. nolookaheadnicematch:
  107. ; best_len = s->prev_length
  108. mov best_len,ebx
  109. ; window = s->window
  110. mov esi,[ebp+dep_window]
  111. mov ecx,[ebp+dep_strstart]
  112. mov window,esi
  113. mov nice_match,edi
  114. ; scan = window + strstart
  115. add esi,ecx
  116. mov scan,esi
  117. ; dx = *window
  118. mov dx,word ptr [esi]
  119. ; bx = *(window+best_len-1)
  120. mov bx,word ptr [esi+ebx-1]
  121. add esi,MAX_MATCH-1
  122. ; scan_start = *scan
  123. mov scan_start,dx
  124. ; strend = scan + MAX_MATCH-1
  125. mov strend,esi
  126. ; bx = scan_end = *(window+best_len-1)
  127. ; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
  128. ; s->strstart - (IPos)MAX_DIST(s) : NIL;
  129. mov esi,[ebp+dep_w_size]
  130. sub esi,MIN_LOOKAHEAD
  131. ; here esi = MAX_DIST(s)
  132. sub ecx,esi
  133. ja nodist
  134. xor ecx,ecx
  135. nodist:
  136. mov limit,ecx
  137. ; prev = s->prev
  138. mov edx,[ebp+dep_prev]
  139. mov prev,edx
  140. ;
  141. mov edx,dword ptr [ebp+dep_match_start]
  142. mov bp,scan_start
  143. mov eax,cur_match
  144. mov match_start,edx
  145. mov edx,window
  146. mov edi,edx
  147. add edi,best_len
  148. mov esi,prev
  149. dec edi
  150. ; windowlen = window + best_len -1
  151. mov windowlen,edi
  152. jmp beginloop2
  153. align 4
  154. ; here, in the loop
  155. ; eax = ax = cur_match
  156. ; ecx = limit
  157. ; bx = scan_end
  158. ; bp = scan_start
  159. ; edi = windowlen (window + best_len -1)
  160. ; esi = prev
  161. ;// here; chain_length <=16
  162. normalbeg0add16:
  163. add chain_length,16
  164. jz exitloop
  165. normalbeg0:
  166. cmp word ptr[edi+eax],bx
  167. je normalbeg2noroll
  168. rcontlabnoroll:
  169. ; cur_match = prev[cur_match & wmask]
  170. and eax,7fffh
  171. mov ax,word ptr[esi+eax*2]
  172. ; if cur_match > limit, go to exitloop
  173. cmp ecx,eax
  174. jnb exitloop
  175. ; if --chain_length != 0, go to exitloop
  176. dec chain_length
  177. jnz normalbeg0
  178. jmp exitloop
  179. normalbeg2noroll:
  180. ; if (scan_start==*(cur_match+window)) goto normalbeg2
  181. cmp bp,word ptr[edx+eax]
  182. jne rcontlabnoroll
  183. jmp normalbeg2
  184. contloop3:
  185. mov edi,windowlen
  186. ; cur_match = prev[cur_match & wmask]
  187. and eax,7fffh
  188. mov ax,word ptr[esi+eax*2]
  189. ; if cur_match > limit, go to exitloop
  190. cmp ecx,eax
  191. jnbexitloopshort1:
  192. jnb exitloop
  193. ; if --chain_length != 0, go to exitloop
  194. ; begin the main loop
  195. beginloop2:
  196. sub chain_length,16+1
  197. ; if chain_length <=16, don't use the unrolled loop
  198. jna normalbeg0add16
  199. do16:
  200. cmp word ptr[edi+eax],bx
  201. je normalbeg2dc0
  202. maccn MACRO lab
  203. and eax,7fffh
  204. mov ax,word ptr[esi+eax*2]
  205. cmp ecx,eax
  206. jnb exitloop
  207. cmp word ptr[edi+eax],bx
  208. je lab
  209. ENDM
  210. rcontloop0:
  211. maccn normalbeg2dc1
  212. rcontloop1:
  213. maccn normalbeg2dc2
  214. rcontloop2:
  215. maccn normalbeg2dc3
  216. rcontloop3:
  217. maccn normalbeg2dc4
  218. rcontloop4:
  219. maccn normalbeg2dc5
  220. rcontloop5:
  221. maccn normalbeg2dc6
  222. rcontloop6:
  223. maccn normalbeg2dc7
  224. rcontloop7:
  225. maccn normalbeg2dc8
  226. rcontloop8:
  227. maccn normalbeg2dc9
  228. rcontloop9:
  229. maccn normalbeg2dc10
  230. rcontloop10:
  231. maccn short normalbeg2dc11
  232. rcontloop11:
  233. maccn short normalbeg2dc12
  234. rcontloop12:
  235. maccn short normalbeg2dc13
  236. rcontloop13:
  237. maccn short normalbeg2dc14
  238. rcontloop14:
  239. maccn short normalbeg2dc15
  240. rcontloop15:
  241. and eax,7fffh
  242. mov ax,word ptr[esi+eax*2]
  243. cmp ecx,eax
  244. jnb exitloop
  245. sub chain_length,16
  246. ja do16
  247. jmp normalbeg0add16
  248. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  249. normbeg MACRO rcontlab,valsub
  250. ; if we are here, we know that *(match+best_len-1) == scan_end
  251. cmp bp,word ptr[edx+eax]
  252. ; if (match != scan_start) goto rcontlab
  253. jne rcontlab
  254. ; calculate the good chain_length, and we'll compare scan and match string
  255. add chain_length,16-valsub
  256. jmp iseq
  257. ENDM
  258. normalbeg2dc11:
  259. normbeg rcontloop11,11
  260. normalbeg2dc12:
  261. normbeg short rcontloop12,12
  262. normalbeg2dc13:
  263. normbeg short rcontloop13,13
  264. normalbeg2dc14:
  265. normbeg short rcontloop14,14
  266. normalbeg2dc15:
  267. normbeg short rcontloop15,15
  268. normalbeg2dc10:
  269. normbeg rcontloop10,10
  270. normalbeg2dc9:
  271. normbeg rcontloop9,9
  272. normalbeg2dc8:
  273. normbeg rcontloop8,8
  274. normalbeg2dc7:
  275. normbeg rcontloop7,7
  276. normalbeg2dc6:
  277. normbeg rcontloop6,6
  278. normalbeg2dc5:
  279. normbeg rcontloop5,5
  280. normalbeg2dc4:
  281. normbeg rcontloop4,4
  282. normalbeg2dc3:
  283. normbeg rcontloop3,3
  284. normalbeg2dc2:
  285. normbeg rcontloop2,2
  286. normalbeg2dc1:
  287. normbeg rcontloop1,1
  288. normalbeg2dc0:
  289. normbeg rcontloop0,0
  290. ; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end
  291. normalbeg2:
  292. mov edi,window
  293. cmp bp,word ptr[edi+eax]
  294. jne contloop3 ; if *(ushf*)match != scan_start, continue
  295. iseq:
  296. ; if we are here, we know that *(match+best_len-1) == scan_end
  297. ; and (match == scan_start)
  298. mov edi,edx
  299. mov esi,scan ; esi = scan
  300. add edi,eax ; edi = window + cur_match = match
  301. mov edx,[esi+3] ; compare manually dword at match+3
  302. xor edx,[edi+3] ; and scan +3
  303. jz begincompare ; if equal, go to long compare
  304. ; we will determine the unmatch byte and calculate len (in esi)
  305. or dl,dl
  306. je eq1rr
  307. mov esi,3
  308. jmp trfinval
  309. eq1rr:
  310. or dx,dx
  311. je eq1
  312. mov esi,4
  313. jmp trfinval
  314. eq1:
  315. and edx,0ffffffh
  316. jz eq11
  317. mov esi,5
  318. jmp trfinval
  319. eq11:
  320. mov esi,6
  321. jmp trfinval
  322. begincompare:
  323. ; here we now scan and match begin same
  324. add edi,6
  325. add esi,6
  326. mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes
  327. repe cmpsd ; loop until mismatch
  328. je trfin ; go to trfin if not unmatch
  329. ; we determine the unmatch byte
  330. sub esi,4
  331. mov edx,[edi-4]
  332. xor edx,[esi]
  333. or dl,dl
  334. jnz trfin
  335. inc esi
  336. or dx,dx
  337. jnz trfin
  338. inc esi
  339. and edx,0ffffffh
  340. jnz trfin
  341. inc esi
  342. trfin:
  343. sub esi,scan ; esi = len
  344. trfinval:
  345. ; here we have finised compare, and esi contain len of equal string
  346. cmp esi,best_len ; if len > best_len, go newbestlen
  347. ja short newbestlen
  348. ; now we restore edx, ecx and esi, for the big loop
  349. mov esi,prev
  350. mov ecx,limit
  351. mov edx,window
  352. jmp contloop3
  353. newbestlen:
  354. mov best_len,esi ; len become best_len
  355. mov match_start,eax ; save new position as match_start
  356. cmp esi,nice_match ; if best_len >= nice_match, exit
  357. jae exitloop
  358. mov ecx,scan
  359. mov edx,window ; restore edx=window
  360. add ecx,esi
  361. add esi,edx
  362. dec esi
  363. mov windowlen,esi ; windowlen = window + best_len-1
  364. mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end
  365. ; now we restore ecx and esi, for the big loop :
  366. mov esi,prev
  367. mov ecx,limit
  368. jmp contloop3
  369. exitloop:
  370. ; exit : s->match_start=match_start
  371. mov ebx,match_start
  372. mov ebp,str_s
  373. mov ecx,best_len
  374. mov dword ptr [ebp+dep_match_start],ebx
  375. mov eax,dword ptr [ebp+dep_lookahead]
  376. cmp ecx,eax
  377. ja minexlo
  378. mov eax,ecx
  379. minexlo:
  380. ; return min(best_len,s->lookahead)
  381. ; restore stack and register ebx,esi,edi,ebp
  382. add esp,NbStackAdd
  383. pop ebx
  384. pop esi
  385. pop edi
  386. pop ebp
  387. ret
  388. InfoAuthor:
  389. ; please don't remove this string !
  390. ; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
  391. db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah
  392. IFDEF NOUNDERLINE
  393. longest_match_7fff endp
  394. ELSE
  395. _longest_match_7fff endp
  396. ENDIF
  397. IFDEF NOUNDERLINE
  398. cpudetect32 proc near
  399. ELSE
  400. _cpudetect32 proc near
  401. ENDIF
  402. pushfd ; push original EFLAGS
  403. pop eax ; get original EFLAGS
  404. mov ecx, eax ; save original EFLAGS
  405. xor eax, 40000h ; flip AC bit in EFLAGS
  406. push eax ; save new EFLAGS value on stack
  407. popfd ; replace current EFLAGS value
  408. pushfd ; get new EFLAGS
  409. pop eax ; store new EFLAGS in EAX
  410. xor eax, ecx ; can�t toggle AC bit, processor=80386
  411. jz end_cpu_is_386 ; jump if 80386 processor
  412. push ecx
  413. popfd ; restore AC bit in EFLAGS first
  414. pushfd
  415. pushfd
  416. pop ecx
  417. mov eax, ecx ; get original EFLAGS
  418. xor eax, 200000h ; flip ID bit in EFLAGS
  419. push eax ; save new EFLAGS value on stack
  420. popfd ; replace current EFLAGS value
  421. pushfd ; get new EFLAGS
  422. pop eax ; store new EFLAGS in EAX
  423. popfd ; restore original EFLAGS
  424. xor eax, ecx ; can�t toggle ID bit,
  425. je is_old_486 ; processor=old
  426. mov eax,1
  427. db 0fh,0a2h ;CPUID
  428. exitcpudetect:
  429. ret
  430. end_cpu_is_386:
  431. mov eax,0300h
  432. jmp exitcpudetect
  433. is_old_486:
  434. mov eax,0400h
  435. jmp exitcpudetect
  436. IFDEF NOUNDERLINE
  437. cpudetect32 endp
  438. ELSE
  439. _cpudetect32 endp
  440. ENDIF
  441. _TEXT ends
  442. end