MS-DOS/v2.0/source/FIND.ASM

932 lines
38 KiB
NASM
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

title MSDOS V2.0 FIND
;--------------------------------------------------------------------;
; Revision History: ;
; ;
; V1.1 8/23/82 M.A.Ulloa ;
; ;
; V1.2 9/22/82 M.A.Ulloa ;
; Added the -c and -n options ;
; ;
; 9/23/82 M.A.Ulloa ;
; Added DOS version number control ;
; ;
; 10/07/82 Rev.2 M.A.Ulloa ;
; Changed quote for double quotes, and added ;
; file name printing ;
; ;
; 10/20/82 Rev.3 M.A.Ulloa ;
; Modified IBM name to FIND, and changed the text ;
; of some messages. ;
; ;
; 10/25/82 Rev.4 M.A.Ulloa ;
; Changed name to FIND and all messages to the ;
; IBM form. ;
; ;
; 10/27/82 Rev.5 M.A.Ulloa ;
; Made the correct exit on version check in case ;
; of a 1.x DOS. ;
; ;
; 11/4/82 Rev. 5 A.R. Reynolds ;
; Messages moved to external module ;
; ;
; 11/10/82 Rev. 6 M.A. Ulloa ;
; Corrected problem with line numbers, and a problem ;
; with seeking for 0 chars. ;
; ;
; 03/30/83 Rev. 7 M.A. Ulloa ;
; Added patch area for bug fixing. ;
; ;
; 04/14/83 Rev. 8 M.A. Ulloa ;
; Made changes for Kanji characters. (uhg!) ;
; ;
;--------------------------------------------------------------------;
FALSE equ 0
TRUE equ NOT FALSE
KANJI equ FALSE ;set to true is kanji vers.
;--------------------------------------------------------------------;
; FIND program following the standart UNIX operation. ;
; ;
; FORMAT: ;
; find {option} string {filename {filename} {...}} ;
; ;
; NOTES: ;
; 1) String arguments HAVE to be enclosed ;
; in double quotes. (Two double quotes if a ;
; doble quote is to be included). Only ONE ;
; string argument is presently allowed. ;
; ;
; 2) Options are available: ;
; v All lines but those matching are considered ;
; c Only print a count of matching lines ;
; n Each line is preceded by its relative ;
; line number in the file. ;
; ;
; - Options can be Upper or lower case. ;
; - Format: The switch character followed by an options ;
; character. I.e.: In the IBM PC: /v ;
; ;
; 3) The program returns: ;
; 0 - OK, and some matches ;
; 2 - Some Error ;
; ;
; 4) The maximum line size is determined by ;
; buffer size. Bigger lines will bomb the program. ;
; ;
; 5) If no file name is given then it will asssume ;
; the input is comming from the Standart Input. NO ;
; errors are reported when reading from Standart Input. ;
;--------------------------------------------------------------------;
code segment public
assume cs:code,ss:code,ds:nothing,es:nothing
CR equ 0dh ;A Carriage Return
LF equ 0ah ;A Line Feed
quote_char equ 22h ;A double quote character
buffer_size equ 4096 ;file buffer size
st_buf_size equ 128 ;string arg. buffer size
fname_buf_size equ 64 ;file name buffer size
;----- DOS EQUATES --------------------------------------------------;
std_in equ 0 ;STD input handle
std_out equ 1 ;STD output handle
std_err equ 2 ;STD error handle
dos_ent equ 21h ;DOS entry point
std_con_string_output equ 9
get_version equ 48
char_oper equ 55 ;get configuration parameters
open equ 61 ;DOS std open code
close equ 62 ;DOS std close code
read equ 63 ;DOS std read code
write equ 64 ;DOS std write code
lseek equ 66 ;DOS file seek
exit equ 76 ;DOS process exit code
;----- Misc Data -----------------------------------------------;
make db "***MAUlloa/Microsoft/V12***"
rev db "8"
colon db ": "
n1_buf db "["
n2_buf db 8 dup(0) ;buffer for number conversion
;----- OPTION FLAGS -------------------------------------------------;
; If a flag is set (0ffh) then the option has been selected, if
;reset (0) then it has been not. All options are reset initially.
; NOTE: the order of this table has to remain consistent with the
;options dispatch code. If any changes are made they have to
;correspond with the code.
opt_tbl:
v_flg db 0
c_flg db 0
n_flg db 0
x_flg db 0 ;not used
l_flg db 0 ;not used
;----- LINE COUNTERS ------------------------------------------------;
mtch_cntr dw 0 ;matched lines counter
line_cntr dw 0 ;line counter
;----- MAIN ROUTINE -------------------------------------------------;
start:
;----- CHECK VERSION NUMBER -----------------------------------------;
mov ah,get_version
int 21h
cmp al,2
jge vers_ok
push cs
pop ds
mov dx,offset bad_vers
mov ah,std_con_string_output
int 21h
push es ;bad vers, exit a la 1.x
xor ax,ax
push ax
badfart proc far ;(what a hack!!)
ret
badfart endp
vers_ok:
push cs ;load ES to the right area,
pop es ; for use with DI register
assume es:code
;--------------------------------------------------------------------;
mov si,81h ;Start addrss. of commad line buf.
call kill_bl ;Get rid of blanks
or bx,bx ;A CR found?
jz find_opt ;no, first find the options
args_missing:
mov dx,offset errmsg1 ;empty command line, no args: error.
mov cl,cs:errlen1
call prt_err
mov al,2 ;error code for exit
jmp done
;----- FIND THE OPTION IF ANY ---------------------------------------;
find_opt:
mov ah,char_oper ;get the dos switch char.
mov al,0
int dos_ent ;switch char in DL
push dx
another_opt:
lodsb ;get the first char of command line
cmp al,' ' ;a blank?
je cont_scan
cmp al,CR ;a Carriage Return
je args_missing
pop dx ;get switch character
cmp al,dl ;is it the switch char?
jne find_str ;no, no options: get the string
push dx ;save for another round
lodsb ;get the option character
cmp al,' ' ;a blank?
je cont_scan ;yes, ignore and continue
cmp al,CR ;a CR?
je args_missing ;yes, error...
call make_caps ;Capitalize the character
mov bx,offset opt_tbl ;pointer to option flag table
cmp al,'V' ;the v option?
je opt_v
cmp al,'C' ;the c option?
je opt_c
cmp al,'N' ;the n option?
je opt_n
mov cs:errmsg5_opt,al ;save the option
mov dx,offset errmsg5 ;unknown option: error
mov cl,cs:errlen5
call prt_err
mov dx,offset crlf ;print a CRLF
mov cx,2
call prt_err
jmp another_opt ;process next option
opt_v:
mov di,0
jmp short opt_dispatch
opt_c:
mov di,1
jmp short opt_dispatch
opt_n:
mov di,2
opt_dispatch:
mov es:byte ptr[bx+di],0ffh ;set the corresponding flag
jmp another_opt ;process the rest of the options
cont_scan:
dec si ;adjust SI
call kill_bl ;get rid of blanks
or bx,bx ;A CR found?
jz another_opt ;no, test for other options
jmp args_missing ;yes, error...
;----- FIND STRING ARGUMENT -----------------------------------------;
find_str:
cmp al,quote_char ;string should start with a
jnz bad_str_err ; quote character, if not: error.
mov di,offset st_buffer ;String argument buffer addrss.
xor cx,cx ;Clear to keep string length.
move_str:
lodsb
cmp al,CR ;if a CR is found in the string
jnz str_ok ; then it's a bad string
bad_str_err:
mov dx,offset errmsg2 ;bad string error message
mov cl,cs:errlen2
call prt_err ;print the error.
mov al,2
jmp done
str_ok:
cmp al,quote_char ;look for a quote character
jnz move_char ;not an apost., move to buffer
lodsb ;an apost., check next char.
cmp al,quote_char ;another quote character?
je move_char ;yes, move it to the buffer
dec si ;no, adjust the pointer
mov es:st_length,cx ;store the string length
or cx,cx ;Is the string empty?
jnz other_args ;no: get the rest of the args.
mov al,1 ;empty: no matches(!?)
jmp done
move_char:
stosb ;put in buffer
inc cx ;increment string length
jmp move_str
;----- FIND THE FILE ARGUMENTS --------------------------------------;
other_args: ;Process the rest of the command
; line arguments.
call kill_bl ;get rid of leading blanks
or bx,bx ;At least one argument necessary,
jz further_args ; if a CR not found: ok.
;----- USE STD IN FOR INPUT -----------------------------------------;
push cs
pop ds
mov ax,std_in ;handle
jmp fill
further_args:
call clr_cntrs ;set all counters to zero
mov di,offset file_name_buf ;Set pointer to the name buffer
xor cx,cx ;zero file name length
move_fname:
lodsb
cmp al,' ' ;A blank: end of file name,
je done_move
cmp al,CR ;A CR: idem.
je done_move
stosb ;store in name buffer
inc cx ;increment file name length
jmp move_fname
done_move:
dec si ;Adjust pointer for next round.
mov es:byte ptr[di],00h ;File names are null terminated
push si ;Save SI to continue com. line scan.
push ds ;Save DS register contents for
; later because it points to the
; rest of the arguments.
mov es:file_name_len,cx ;save the name length
;----- OPEN FILE FOR READING ----------------------------------------;
push cs ;Load new DS with CS
pop ds
mov dx,offset file_name_buf ;addrss. of the file name
mov ah,open
mov al,0 ;file open for reading
int dos_ent ;call the DOS
jnc say_name ;if no carry then no errors
jmp open_error
;----- PRINT FILE NAME ----------------------------------------------;
say_name:
push ax ;save file handle
mov dx,offset heading
mov cl,cs:heading_len
xor ch,ch
call prout
mov dx,offset file_name_buf
mov cx,ds:file_name_len
call prout
cmp ds:c_flg,0ffh ;count only flag set?
je xx1
mov dx,offset crlf
mov cx,2
call prout
xx1:
pop ax
;----- Fill Buffer for Matching -------------------------------------;
fill:
mov bx,ax ;retrieve handle
refill:
mov dx,offset buffer ;data buffer addrss.
mov cx,buffer_size
mov ah,read
int dos_ent
jnc no_read_error ;if carry then read error
jmp read_error
no_read_error:
or ax,ax ;if ax=0 then all done
jnz go_match
cmp ds:c_flg,0ffh ;count only flag set?
jne sj2
call print_count
sj2:
cmp bx,std_in ;Using STD IN?
jnz regular
jmp foo ;if so: all done, exit
regular:
mov ah,close ;otherwise close the file
int dos_ent
jmp scan_rest ;get another file
;----- MATCH ROUTINE ------------------------------------------------;
;Note: If input is being taken from a file the stack contains
; (from top to bottom):
; - Pointer to the next command in the command line
; - Pointer to the program segment prefix (to be loaded into
; DS to access the command line.
; if the imput is from the standart input then NONE of it will be
; in the stack.
go_match:
push bx ;save the file handle
mov bp,offset buffer ;ptr to first line of file
mov di,ax ;dispalcement from beg of buffer
cmp ax,buffer_size-1 ;last line of the file?
jg no_last_line ;if yes, add a CRLF just in case
mov bx,bp
cmp byte ptr[bx+di-1],LF ;finished with a LF?
je no_last_line ;yes, it's an OK line.
mov byte ptr[bx+di],CR ;put a CR at the end of the data
inc di
mov byte ptr[bx+di],LF ;put a LF ...
inc di
no_last_line:
push di ;save the # of chars. in the buffer
push bp
mov dx,ds:st_length ;length of the string arg.
dec dx ;adjust for later use
jmp short try_again
more_stuff_o:
jmp more_stuff
;----- SCAN LINES IN THE BUFFER FOR A MATCH -------------------------;
;Note: at this point the stack contains (from top to bottom):
; - Stuff mentioned before
; - File Handle
; - Number of chars. left in the buffer from the next line.
; - Addrs. of the next line in the buffer.
;
; plus, DX has the adjusted length of the string argument.
try_again:
inc ds:line_cntr ;increment line counter
pop bp ;addrs. of next line in the buffer
mov di,bp ;points to beg. of a line
pop cx ;get # of chars left in the buffer
mov bx,cx ;save in case a non-complete line
mov al,LF ;search for a Line Feed
jcxz more_stuff_o ;no chars left in buffer
repnz scasb
jnz more_stuff_o ;no full line left in buffer
push cx ;save chars left in buffer
push di ;points to beg. of next line
mov cx,di
sub cx,bp ;length of the current line
mov bx,cx ;save in case it has a match
dec cx
dec cx ;CRLF characters discounted
jcxz try_again_opt ;if line empty go to next line
mov di,bp ;pointer to the beg. of current line
another_char:
;
; On entry:
; BX line length
; CX adjusted line length
; DX adjusted string argument length
; DI points to beg. of line
;
IF KANJI
push dx ;save for next line
lop:
pop dx
push dx
inc dx ;different algorithm!
mov si,offset st_buffer ;pointer to beg. of string argument
comp_next_char:
push di
mov di,si
call is_prefix ;check for a prefix char
pop di
jnc nopre
lodsw
cmp cx,1 ; Can not compare a two byte char
jz try_again_opt1 ; if there is only one available
cmp ax,word ptr [di]
jz kmatch1
call next_kchar ;no match, advance di to next kanji
jc try_again_opt1 ;not enough chars left in line
jmp short lop ;try another char in line
nopre:
lodsb
cmp al,byte ptr [di]
jz kmatch
call next_kchar ;no match, advance di to next kanji
jc try_again_opt1 ;not enough chars left in line
jmp short lop ;try another char in line
try_again_opt1:
pop dx
jmp try_again_opt
kmatch1:
dec dx ;last char had prefix so it was
; long.
kmatch:
dec dx
jz a_matchk ; no chars left: a match!
call next_kchar
jc try_again_opt1
jmp comp_next_char ; loop if chars left in arg.
a_matchk:
pop dx
ELSE
mov si,offset st_buffer ;pointer to beg. of string argument
lodsb ;get first character of the str. arg.
repnz scasb ;search for a match in current line
jnz try_again_opt ;no match, try the next line
cmp cx,dx ;compare lengths, a full match is not
jb try_again_opt ; possible if CX < DX.
push di ;save addrs. of next char. in the line
push cx ;save the # of chars. left in the line
mov cx,dx ;get the adjusted string arg. length
jcxz a_match ;if a single char string, then match!
repz cmpsb ;compare string with line
jz a_match ;a match found, hurrah!
pop cx ;no match, get # of chars remaining
; in the line.
pop di ;position of the next char. in the line
jmp another_char
;----- A MATCH: CHECK FOR THE v OPTION ------------------------------;
a_match:
pop ax ;adjust stack
pop ax
ENDIF
cmp ds:v_flg,0ffh ;is flag set?
jne prt_line ;no, print the line
jmp try_again
;----- NO MATCH: CHECK FOR THE v OPTION -----------------------------;
try_again_opt:
cmp ds:v_flg,0ffh ;is flag set?
jne try_again ;no goto next line
;----- PRINT THE LINE WITH THE MATCH --------------------------------;
;Note: at this point the stack contains (top to bottom)
; - Stuff mentioned before
;
; plus, BP points to begginig of the current line, BX has the length
;of the current line including the CRLF, and DX the adjusted length of
;the string argument.
prt_line:
cmp ds:c_flg,0ffh ;is count only flag set?
jne no_c_flg
inc ds:mtch_cntr ;yes, increment counter
jmp try_again
no_c_flg:
push dx ;save the adjusted string arg. length
cmp ds:n_flg,0ffh ;is line number flag set?
jne no_n_flg
call prt_lcntr
no_n_flg:
mov dx,bp
mov cx,bx
call prout
pop dx ;restore
jmp try_again
;----- READ MORE TEXT LINES INTO THE BUFFER -------------------------;
; The scanning routines have detected that the buffer does not
;contain a full line any more. More lines have to be read into the
;buffer. But first perform a seek on the file in order to re-read
;the non-complete line into the begining of the buffer.
; Uppon entry BP contains points to the begining of the non-complete
;line, and BX has the number of characters left in the buffer.
; The Stack contains (top to bottom):
; - Pointer to the next command in the command line
; - Pointer to the program segment prefix (to be loaded into
; DS to access the command line).
; - File handle.
more_stuff:
mov dx,bx ;get chars left in buffer
pop bx ;get the handle
or dx,dx ;are there 0 left?
jz no_seek ;yes, do not seek
neg dx ;form two's complement
mov cx,-1
mov al,1 ;seek from the current position
mov ah,lseek ;seek on file
int dos_ent
jc read_error
no_seek:
jmp refill ;no errors: refill the buffer
read_error:
cmp bx,std_in ;Using STD IN?
je foo ;if so: all done, exit
mov ah,close ;close the file
int dos_ent
mov dx,offset errmsg4_pre ;read error
mov cl,cs:errlen4_pre
call prt_file_name ;print the file name in error
mov dx,offset errmsg4_post ;read error
mov cl,cs:errlen4_post
jmp r_error
;----- PRINT ERRORS -------------------------------------------------;
open_error:
mov dx,offset errmsg3_pre ;error in open operation
mov cl,cs:errlen3_pre
call prt_err_2 ;print error message
call prt_file_name ;print the file name in error
mov dx,offset errmsg3_post ;error in open operation
mov cl,cs:errlen3_post
r_error:
call prt_err_2 ;print error message
;----- SCAN THE REST OF THE COMMAND LINE ----------------------------;
scan_rest:
pop ds ;restore pointer to comm. line
pop si ;restore pointer to next comm.
call kill_bl ;look for further args.
or bx,bx ;test for a CR
jnz foo
jmp further_args
foo:
mov al,0 ;Proper code
done:
mov ah,exit ;All done, exit with proper code.
int dos_ent
;--------------------------------------------------------------------;
; Get rid of blanks in command line. ;
; Advances the SI reg till the next non-blank character, if the ;
; character is a CR (0dh) then returns with BX non-zero, otherwise ;
; BX is zero. ;
; ;
; entry: ;
; SI points to the first character on the line to scan. ;
; ;
; exit: ;
; SI points to the first non-blank character found. ;
; BX contains 0D hex if the first non-blank found is ;
; a Carriage Return, otherwise it is 0. ;
; ;
; modifies: ;
; BX, SI, and AX ;
; ;
;--------------------------------------------------------------------;
kill_bl:
cld ;increment
xor bx,bx ;zero bx to start: no CR found
no_bl:
lodsb ;get rid of blanks
cmp al,' '
je no_bl
cmp al,CR
jnz no_cr
mov bx,ax ;make bx non-zero (actually 0dh)
no_cr:
dec si ;adjust pointer
ret
;--------------------------------------------------------------------;
; Clear Counters ;
;--------------------------------------------------------------------;
clr_cntrs:
mov byte ptr es:mtch_cntr,0
mov byte ptr es:line_cntr,0
ret
;--------------------------------------------------------------------;
; Print Count of Matched lines ;
; ;
; Modifies: AX,CX,DX and DI ;
;--------------------------------------------------------------------;
print_count:
push bx ;save handle
cmp bx,std_in ;using std_in?
jz sj3 ;if so do not print file name
mov dx,offset colon
mov cx,2
call prout ;print colon
sj3:
mov ax,ds:mtch_cntr
mov di,offset n2_buf ;buffer for characters
call bin2asc ;convert to ascii
mov dx,offset n2_buf
call prout ;print the number
mov dx,offset crlf
mov cx,2
call prout ;print an end of line
pop bx
ret
;--------------------------------------------------------------------;
; Print relative line number ;
; ;
; Modifies: AX,CX and DI ;
;--------------------------------------------------------------------;
prt_lcntr:
push bx
push dx
mov ax,ds:line_cntr
mov di,offset n2_buf
call bin2asc
mov byte ptr[di],"]"
inc cx
inc cx
mov dx,offset n1_buf
call prout
pop dx
pop bx
ret
;--------------------------------------------------------------------;
; Print string to STD_OUT ;
;--------------------------------------------------------------------;
prout:
mov bx,std_out
mov ah,write
int dos_ent
ret
;--------------------------------------------------------------------;
; Binary to Ascii conversion routine ;
; ;
; Entry: ;
; AX Binary number ;
; DI Points to one past the last char in the ;
; result buffer. ;
; ;
; Exit: ;
; Result in the buffer MSD first ;
; CX Digit count ;
; ;
; Modifies: ;
; AX,BX,CX,DX and DI ;
; ;
;--------------------------------------------------------------------;
bin2asc:
mov bx,0ah
xor cx,cx
go_div:
inc cx
cmp ax,bx
jb div_done
xor dx,dx
div bx
add dl,'0' ;convert to ASCII
push dx
jmp short go_div
div_done:
add al,'0'
push ax
mov bx,cx
deposit:
pop ax
stosb
loop deposit
mov cx,bx
ret
;--------------------------------------------------------------------;
; Print the current file name ;
; ;
; modifies: ;
; DX, CX, BX and AX ;
;--------------------------------------------------------------------;
prt_file_name:
mov dx,offset file_name_buf ;print the file name
mov cx,ds:file_name_len ;retrive file name length
jmp short prt_err_2
;--------------------------------------------------------------------;
; Print an error message to the Standart error ;
; ;
; entry: ;
; DX has the pointer to the message ;
; CX has the length of the message ;
; ;
; modifies: ;
; BX and AX ;
;--------------------------------------------------------------------;
prt_err:
push ds ;Save the current DS
push cs ;Make DS point to the right
pop ds ; place, for DOS use.
call prt_err_2
pop ds
ret
prt_err_2:
xor ch,ch
mov bx,std_err
mov ah,write
int dos_ent ;write error message
ret
;--------------------------------------------------------------------;
; CAPIALIZES THE CHARACTER IN AL ;
; ;
; entry: ;
; AL has the character to Capitalize ;
; ;
; exit: ;
; AL has the capitalized character ;
; ;
; modifies: ;
; AL ;
;--------------------------------------------------------------------;
make_caps:
cmp al,'a'
jb no_cap
cmp al,'z'
jg no_cap
and al,0dfh
no_cap:
ret
IF KANJI
;--------------------------------------------------------------------;
; ADVANCE POINTER TO NEXT KANJI CHARACTER ;
; ;
; entry: DI points to a Kanji string ;
; CX length in bytes of the string ;
; ;
; exit: DI points to next Kanji char ;
; CX has number of bytes left ;
; ;
; modifies: AX ;
; ;
;--------------------------------------------------------------------;
next_kchar:
jcxz no_kleft
call is_prefix
jnc no_p
inc di
dec cx
jcxz no_kleft ; for insurance
no_p:
inc di
dec cx
clc
ret
no_kleft:
stc
ret
;--------------------------------------------------------------------;
; FIND OUT IS THE BYTE IS A KANJI PREFIX ;
; ;
; entry: DI points to a kanji string ;
; ;
; exit: Carry set if it is a kanji prefix ;
; ;
; modifies: AX ;
; ;
;--------------------------------------------------------------------;
is_prefix:
mov al,byte ptr [di]
cmp al,81h
jb nok
cmp al,0a0h
jb isk
cmp al,0e0h
jb nok
cmp al,0fdh
jb isk
nok:
clc
ret
isk:
stc
ret
ENDIF
;----- PATCH AREA ---------------------------------------------------;
patch_area dw 100h dup(?)
;----- BUFFER AREA --------------------------------------------------;
st_length dw 0 ;String argumnet length
st_buffer db st_buf_size dup(?) ;String argument buffer
file_name_len dw 0 ;File name length
file_name_buf db fname_buf_size+1 dup(?) ;File name buffer,(allow for
; null at the end).
buffer db buffer_size+1 dup(?) ;file buffer, the last byte is
;a guard in case of forced insertion
;of a CRLF pair.
;----- ERROR MESSAGES -----------------------------------------------;
EXTRN bad_vers:byte,crlf:byte,errmsg1:byte,errlen1:byte,errmsg2:byte
EXTRN errmsg3_pre:byte,errlen3_pre:byte
EXTRN errmsg3_post:byte,errlen3_post:byte
EXTRN errmsg4_pre:byte,errlen4_pre:byte
EXTRN errmsg4_post:byte,errlen4_post:byte
EXTRN heading:byte,heading_len:byte,errlen2:byte
EXTRN errmsg5:byte,errmsg5_opt:byte,errlen5:byte
code ends
;----- STACK AREA ---------------------------------------------------;
stack segment stack
dw 64 dup(?,?)
stack_top equ $
stack ends
end start