Metropoli BBS
VIEWER: clean.asm MODE: TEXT (ASCII)
        title   CLEAN - Text file filter 
        page    55,132
;
; CLEAN.ASM     Filter to turn document files into
;               normal text files.
;
; Copyright 1988 Ray Duncan
;
; Build:        C>MASM CLEAN;
;               C>LINK CLEAN;
;                       
; Usage:        C>CLEAN  <infile  >outfile
;
; All text characters are passed through with high 
; bit stripped off.  Form feeds, carriage returns, 
; and line feeds are passed through.  Tabs are expanded 
; to spaces.  All other control codes are discarded.
;

tab     equ     09h             ; ASCII tab code
lf      equ     0ah             ; ASCII line feed
ff      equ     0ch             ; ASCII form feed
cr      equ     0dh             ; ASCII carriage return
blank   equ     020h            ; ASCII space code
eof     equ     01ah            ; Ctrl-Z End-of-file 

tabsiz  equ     8               ; width of tab stop

bufsiz  equ     128             ; size of input and
                                ; output buffers

stdin   equ     0000            ; standard input handle
stdout  equ     0001            ; standard output handle
stderr  equ     0002            ; standard error handle


_TEXT   segment word public 'CODE'

        assume  cs:_TEXT,ds:_DATA,es:_DATA,ss:STACK 

clean   proc    far             ; entry point from MS-DOS.

        push    ds              ; save DS:0000 for final
        xor     ax,ax           ; return to MS-DOS, in case
        push    ax              ; Function 4CH can't be used.   

        mov     ax,_DATA        ; make data segment addressable
        mov     ds,ax
        mov     es,ax

        mov     ah,30h          ; check version of MS-DOS
        int     21h
        cmp     al,2            ; MS-DOS 2.0 or later?
        jae     clean1          ; jump if version OK

                                ; MS-DOS 1.x, display error
                                ; message and exit...
        mov     dx,offset msg1  ; DS:DX = message address
        mov     ah,9            ; Fxn 9 = display string
        int     21h             ; transfer to MS-DOS
        ret                     ; then exit the old way

clean1: call    init            ; initialize input buffer

clean2: call    getc            ; get character from input
        jc      clean9          ; exit if end of stream

        and     al,07fh         ; strip off high bit

        cmp     al,blank        ; is it a control char?
        jae     clean4          ; no, write it

        cmp     al,eof          ; is it end of file?
        je      clean8          ; yes, write EOF and exit

        cmp     al,tab          ; is it a tab?
        je      clean6          ; yes, expand it to spaces

        cmp     al,cr           ; is it a carriage return?
        je      clean3          ; yes, go process it.

        cmp     al,lf           ; is it a line feed?
        je      clean3          ; yes, go process it.

        cmp     al,ff           ; is it a form feed?
        jne     clean2          ; no, discard it  

clean3: mov     column,0        ; if CR, LF, or FF
        jmp     clean5          ; reset column to zero

clean4: inc     column          ; if non-control char.
                                ; increment column counter
                                
clean5: call    putc            ; write char. to stdout
        jnc     clean2          ; if disk not full,
                                ; get another character

                                ; write failed...
        mov     dx,offset msg2  ; DS:DX = error message
        mov     cx,msg2_len     ; CX = message length
        mov     bx,stderr       ; BX = standard error handle
        mov     ah,40h          ; Fxn 40H = write
        int     21h             ; transfer to MS-DOS

        mov     ax,4c01h        ; Fxn 4CH = terminate
                                ; return code = 1
        int     21h             ; transfer to MS-DOS

clean6: mov     ax,column       ; tab code detected
        cwd                     ; tabsiz - (column MOD tabsiz)
        mov     cx,tabsiz       ; is number of spaces needed
        idiv    cx              ; to move to next tab stop
        sub     cx,dx

        add     column,cx       ; also update column counter

clean7: push    cx              ; save spaces counter

        mov     al,blank        ; write an ASCII space
        call    putc

        pop     cx              ; restore spaces counter
        loop    clean7          ; loop until tab stop 

        jmp     clean2          ; get another character
                                
clean8: call    putc            ; write EOF mark

clean9: call    flush           ; write last output buffer
        mov     ax,4c00h        ; Fxn 4CH = terminate
                                ; return code =0
        int     21h             ; transfer to MS-DOS

clean   endp


getc    proc    near            ; get char. from stdin
                                ; returns Carry = 1 if
                                ; end of input, else
                                ; AL = char, Carry = 0

        mov     bx,iptr         ; get input buffer pointer
        cmp     bx,ilen         ; end of buffer reached?
        jne     getc1           ; not yet, jump

                                ; more data is needed...
        mov     bx,stdin        ; BX = standard input handle
        mov     cx,bufsiz       ; CX = length to read
        mov     dx,offset ibuff ; DS:DX = buffer address
        mov     ah,3fh          ; Fxn 3FH = read
        int     21h             ; transfer to MS-DOS
        jc      getc2           ; jump if read failed
        
        or      ax,ax           ; was anything read?
        jz      getc2           ; jump if end of input

        mov     ilen,ax         ; save length of data
        xor     bx,bx           ; reset buffer pointer

getc1:  mov     al,[ibuff+bx]   ; get char. from buffer
        inc     bx              ; bump buffer pointer

        mov     iptr,bx         ; save updated pointer
        clc                     ; return char in AL
        ret                     ; and Carry = 0 (clear)

getc2:  stc                     ; end of input stream
        ret                     ; return Carry = 1 (set)

getc    endp


putc    proc    near            ; send char. to stdout
                                ; returns Carry = 1 if
                                ; error, else Carry = 0

        mov     bx,optr         ; store character into
        mov     [obuff+bx],al   ; output buffer

        inc     bx              ; bump buffer pointer
        cmp     bx,bufsiz       ; buffer full?
        jne     putc1           ; no, jump

        mov     bx,stdout       ; BX = standard output handle
        mov     cx,bufsiz       ; CX = length to write
        mov     dx,offset obuff ; DS:DX = buffer address
        mov     ah,40h          ; Fxn 40H = write
        int     21h             ; transfer to MS-DOS
        jc      putc2           ; jump if write failed

        cmp     ax,cx           ; was write complete?
        jne     putc2           ; jump if disk full

        xor     bx,bx           ; reset buffer pointer

putc1:  mov     optr,bx         ; save buffer pointer
        clc                     ; write successful,
        ret                     ; return Carry = 0 (clear)

putc2:  stc                     ; write failed or disk full,
        ret                     ; return Carry = 1 (set)

putc    endp


init    proc    near            ; initialize input buffer

        mov     bx,stdin        ; BX = standard input handle
        mov     cx,bufsiz       ; CX = length to read
        mov     dx,offset ibuff ; DS:DX = buffer address
        mov     ah,3fh          ; Fxn 3FH = read
        int     21h             ; transfer to MS-DOS
        jc      init1           ; jump if read failed
        mov     ilen,ax         ; save actual bytes read
init1:  ret

init    endp


flush   proc    near            ; flush output buffer

        mov     cx,optr         ; CX = bytes to write
        jcxz    flush1          ; exit if buffer empty
        mov     dx,offset obuff ; DS:DX = buffer address
        mov     bx,stdout       ; BX = standard output handle
        mov     ah,40h          ; Fxn 40H = write
        int     21h             ; transfer to MS-DOS
flush1: ret

flush   endp

_TEXT   ends


_DATA   segment word public 'DATA'

ibuff   db      bufsiz dup (0)  ; input buffer
obuff   db      bufsiz dup (0)  ; output buffer

iptr    dw      0               ; ibuff pointer
ilen    dw      0               ; bytes in ibuff
optr    dw      0               ; obuff pointer 

column  dw      0               ; current column counter

msg1    db      cr,lf
        db      'clean: need MS-DOS version 2 or greater.'
        db      cr,lf,'$'

msg2    db      cr,lf
        db      'clean: disk is full.'
        db      cr,lf
msg2_len equ    $-msg2

_DATA   ends


STACK   segment para stack 'STACK'

        dw      64 dup (?)

STACK   ends

        end     clean
[ RETURN TO DIRECTORY ]