Metropoli BBS
VIEWER: lzw.asm MODE: TEXT (ASCII)
;LZW compression v1.00

; by : Peter Quiring

; NOTE : This is a unique twist to LZW.  This will not work on LZW or LHA
; files. (in fact I have no idea exactly what the LZW compress is in them)

;256 = clear code
;257 = EOF
;258 = first avail. code

include qlib.inc
include lzw.inc

CLR_CODE equ 256
EOF_CODE equ 257
FIRST_CODE equ 258

; global data for (de)compressor
; for a good explaination of how to use this see "lzw.txt"
; for a good explaination of how LZW compression works see my tutorials
;   on my homepage

.data?
  ;dwords
  public lzw_ht   ;in case you wanna dump it
  lzw_ht label byte
  ht dd 4096 dup (?)     ;hash table (16k-ouch!) (dwords for faster comparison)
    ;format : [empty db][suffix db][prefix dw]
  ;words
  code dd ?              ;current code
  s dw ?                 ;string (prefix code)
  nb db ?                ;number of bits to output
  ;bytes
  bo db ?                ;bit offset of output/input
  b db ?                 ;byte   (suffix code)
  oc dw ?                ;old code
  nc dw ?                ;new code
  bad_lzw db ?           ;==1 if the LZW compressed data if faulty
  dlen dd ?              ;used in decompression
  tlen dd ?              ;total length of data

.code
coutput proc private,d:word
  mov ax,d
  mov cl,16
  sub cl,nb
  shl ax,cl

  mov cl,bo  ;shift
  mov ch,nb  ;counter
@@:
  rcl ax,1
  .if carry?
    mov dl,1
    shl dl,cl
    or [edi],dl
  .endif
  dec cl
  .if cl==255
    inc edi
    mov [edi],bptr 0
    mov cl,7
  .endif
  dec ch
  jnz @b
  mov bo,cl
  ret
coutput endp

getbyte macro
  xor eax,eax
  lodsb
  dec len
endm

lzw_compress proc,dest:dword,src:dword,len:dword
  pushad
  mov edi,dest
  mov esi,src
  mov eax,len
  stosd
  mov dptr[edi],0
  mov bo,7            ;current bit offset
restart:
  mov nb,9            ;# bits (9)
  mov code,FIRST_CODE ;1st avail code
  .if !len
    jmp codone
  .endif
  getbyte
  mov s,ax
  .if !len
    callp coutput,ax
    jmp codone
  .endif
  callp coutput,ax
  getbyte
  mov b,al
  .if !len
    callp coutput,ax
    jmp codone
  .endif
  xor eax,eax
  mov ah,b
  shl eax,8
  mov ax,s
  mov dptr[ht],eax
  inc code  ;move to next avail code
do1: ;repeat until code > 12 bits & restart here for larger repeat
    xor ah,ah
    mov al,b
    mov s,ax   ;s=b
    getbyte
    mov b,al
    .if !len
      callp coutput,s
      callp coutput,b
      jmp codone
    .endif
do2: ;repeat until we find a new string
    mov ecx,code
    mov edx,ecx
    sub ecx,FIRST_CODE  ;ecx=# of entries in hash table
    push edi
    mov edi,offset ht
    xor eax,eax
    mov ah,b
    shl eax,8
    mov ax,s
    repnz scasd
    pop edi
    jnz c1
  ;not a new string (keep looking)
    inc cx             ;move back to code that was the same
    sub dx,cx
    mov s,dx
    getbyte
    mov b,al
    .if !len
      callp coutput,s 
      callp coutput,b
      jmp codone
    .endif
    jmp do2
c1:  
    ;got a new string
    ;place s,b into hash table  
    mov ebx,code
    sub ebx,FIRST_CODE
    xor eax,eax
    mov ah,b
    shl eax,8
    mov ax,s
    shl ebx,2 ;*4
    mov [ebx+ht],eax
    callp coutput,s
    inc code
    cmp code,200h   ;10bit
    jnz @f
    inc nb
    jmp do1
@@:
    cmp code,400h   ;11bit
    jnz @f
    inc nb
    jmp do1
@@:
    cmp code,800h   ;12bit
    jnz @f
    inc nb
    jmp do1
@@:
    cmp code,1000h  ;13bit = restart
    jnz do1
  ; end of this block  (send reset code and restart hash table)
    callp coutput,b
    callp coutput,CLR_CODE
    jmp restart
codone:  ;done!
  callp coutput,EOF_CODE
  .if bo!=7
    inc edi ;to get last part
  .endif
  sub edi,dest
  mov [esp+4*7],edi  ;save EAX for ret val (size of compressed data)
  popad
  ret
lzw_compress endp

getcode proc private
  xor eax,eax
  mov bl,[esi]
  mov cl,7
  sub cl,bo
  shl bl,cl
  mov cl,bo
  mov ch,nb
@@:
  rcl bl,1
  rcl ax,1
  dec cl
  .if cl==255
    inc esi
    mov bl,[esi]
    mov cl,7
  .endif
  dec ch
  jnz @b
  mov bo,cl
  ret
getcode endp

doutput proc private   ;output string and return 1st byte
  ;output the whole string and return the 1st byte of string
  ;put 1st byte into hash table (so that last byte of string will
  ;  be successfully outputed (it's very confusing)

  ;nc=code obtained from input stream data
  ;code=current code pos in hash table

  local flg:byte,fb:byte

  mov flg,0  ;indicates that string used does not have a suffix
  mov ebx,esp  ;save stack pos
  xor eax,eax
  mov ax,nc    ;to be outputed
@@:
  .if ax<100h
    ;push code and done!
    mov fb,al    ;first byte
    dec esp
    mov [esp],al
    jmp done
  .endif
  .if eax==code
    mov ax,oc  ;then it simply is the OC
    .if flg
      jmp bad
    .endif
    inc flg  ;this can only happen once (if it happens more the data is corrupt)
    jmp @b
  .endif
;push the suffix
  xor ecx,ecx
  mov cx,ax
  sub cx,FIRST_CODE
  shl ecx,2
  add ecx,offset ht
  mov al,[ecx+2]      ;get suffix
  dec esp
  mov [esp],al
  mov ax,[ecx]    ;get prefix
  jmp @b
done:
  ;undo stack
@@:
  mov al,[esp]
  stosb
  inc esp
  inc dlen
  dec tlen
  .if esp!=ebx
    .if !tlen
      jmp bad
    .endif
    jmp @b
  .endif
  mov al,fb
  .if flg
    ;output fb again
    stosb
    inc dlen
    .if !tlen
      jmp bad
    .endif
    dec tlen
  .endif
  ret
bad:
  inc bad_lzw
  ret
doutput endp

lzw_decompress proc,dest:dword,src:dword
  
  pushad
  mov bo,7
  mov nb,9
  mov esi,src
  mov edi,dest
  lodsd
  mov tlen,eax
  mov dlen,0
  mov bad_lzw,0
restart:
  mov nb,9
  mov code,FIRST_CODE
  call getcode
  mov oc,ax
  .if ax==EOF_CODE
    jmp dedone
  .endif
  .if !tlen
    jmp bad
  .endif
  .if ax>=100h
    jmp bad
  .endif
  stosb
  inc dlen
dc1:  ;repeat until we get code EOF CODE
    .if bad_lzw
      jmp bad
    .endif
    call getcode
    .if ax==EOF_CODE
      jmp dedone
    .endif
    .if !tlen
      jmp bad
    .endif
    .if ax==CLR_CODE
      jmp restart
    .endif
    mov nc,ax
    call doutput
    mov b,al
    mov ebx,code
    sub ebx,FIRST_CODE
    shl ebx,2
    xor eax,eax
    mov ah,b
    shl eax,8
    mov ax,oc
    mov [ebx+ht],eax
    inc code
    .if code==512 || code==1024 || code==2048
      inc nb
    .elseif code==4096
      call getcode  ;must be clear code now
      .if ax==CLR_CODE
        jmp restart
      .else
        ;may be last code
        .if ax<100h
          stosb
          call getcode
          .if ax==CLR_CODE
            jmp restart
          .endif
        .endif
      .endif
      jmp bad
    .endif
    mov ax,nc       ;oc=nc;
    mov oc,ax
    jmp dc1
dedone:
  popad
  mov eax,dlen
  ret
bad:
  popad
  mov eax,ERROR
  ret
lzw_decompress endp

end

[ RETURN TO DIRECTORY ]