mirror of
https://github.com/microsoft/MS-DOS.git
synced 2024-12-01 18:15:47 +00:00
420 lines
17 KiB
NASM
420 lines
17 KiB
NASM
|
TITLE SORT FILTER FOR MS-DOS
|
|||
|
;
|
|||
|
; Sort /R /+n
|
|||
|
; /R -> reverse sort
|
|||
|
; /+n -> sort on column n
|
|||
|
;
|
|||
|
; Written by: Chris Peters
|
|||
|
;
|
|||
|
; Modification History:
|
|||
|
; 3-18-83 MZ Fix CR-LF at end of buffer
|
|||
|
; Fix small file sorting
|
|||
|
; Fix CR-LF line termination bug
|
|||
|
; Comment the Damn source
|
|||
|
;
|
|||
|
FALSE EQU 0
|
|||
|
TRUE EQU NOT FALSE
|
|||
|
|
|||
|
;NOTE: "internat" must be false if KANJI version
|
|||
|
internat equ true
|
|||
|
;NOTE: see above
|
|||
|
|
|||
|
.xlist
|
|||
|
.xcref
|
|||
|
INCLUDE DOSSYM.ASM
|
|||
|
.cref
|
|||
|
.list
|
|||
|
|
|||
|
sys MACRO name ; system call macro
|
|||
|
MOV AH,name
|
|||
|
INT 21h
|
|||
|
ENDM
|
|||
|
save MACRO reglist ; push those registers
|
|||
|
IRP reg,<reglist>
|
|||
|
PUSH reg
|
|||
|
ENDM
|
|||
|
ENDM
|
|||
|
restore MACRO reglist ; pop those registers
|
|||
|
IRP reg,<reglist>
|
|||
|
POP reg
|
|||
|
ENDM
|
|||
|
ENDM
|
|||
|
|
|||
|
MAXREC EQU 256 ; MAXIMUM NUL RECORD SIZE
|
|||
|
|
|||
|
SPACE EQU 0 ; Offset zero in the allocated block
|
|||
|
BUFFER EQU MAXREC ; Offset MAXREC in the allocated block
|
|||
|
|
|||
|
SUBTTL Segments used in load order
|
|||
|
|
|||
|
|
|||
|
CODE SEGMENT
|
|||
|
CODE ENDS
|
|||
|
|
|||
|
CONST SEGMENT PUBLIC BYTE
|
|||
|
CONST ENDS
|
|||
|
|
|||
|
CSTACK SEGMENT STACK
|
|||
|
DB 128 DUP (0) ; initial stack to be clear
|
|||
|
CSTACK ENDS
|
|||
|
|
|||
|
DG GROUP CODE,CONST,CSTACK
|
|||
|
|
|||
|
CODE SEGMENT
|
|||
|
ASSUME CS:DG,DS:NOTHING,ES:NOTHING,SS:CSTACK
|
|||
|
|
|||
|
COLUMN DW 0 ; COLUMN TO USE FOR KEY + 1
|
|||
|
SWITCH DB '/'
|
|||
|
|
|||
|
SORT:
|
|||
|
;
|
|||
|
; check for proper version number of system
|
|||
|
;
|
|||
|
sys GET_VERSION
|
|||
|
XCHG AH,AL ; Turn it around to AH.AL
|
|||
|
CMP AX,200H ; Version 2.00 only
|
|||
|
JAE OKDOS ; Success
|
|||
|
MOV DX,OFFSET DG:BADVER ; Get error message
|
|||
|
PUSH CS ; Get DS addressability
|
|||
|
POP DS
|
|||
|
sys STD_CON_STRING_OUTPUT ; Send to STDOUT
|
|||
|
PUSH ES ; long segment
|
|||
|
PUSH COLUMN ; offset zero
|
|||
|
LONG_RET PROC FAR
|
|||
|
RET ; long return to OS
|
|||
|
LONG_RET ENDP
|
|||
|
;
|
|||
|
; get proper switch character
|
|||
|
;
|
|||
|
OKDOS:
|
|||
|
MOV AL,0 ; Get current switch character
|
|||
|
sys CHAR_OPER
|
|||
|
MOV SWITCH,DL
|
|||
|
;
|
|||
|
; parse command line
|
|||
|
;
|
|||
|
MOV SI,80H ; pointer to command line
|
|||
|
CLD ; go left to right
|
|||
|
XOR CX,CX
|
|||
|
LODSB
|
|||
|
MOV CL,AL ; CX = length of command line
|
|||
|
SWITCH_LOOP:
|
|||
|
CALL GET_CHAR ; get a character
|
|||
|
CMP AL,SWITCH ; beginning of switch?
|
|||
|
JNZ SWITCH_LOOP ; No, get next character
|
|||
|
CALL GET_CHAR ; get 1st char of switch
|
|||
|
CMP AL,'+' ; Column to sort?
|
|||
|
JZ SWITCH_NUMBER ; Yes, parse a number
|
|||
|
OR AL,20h ; convert to lower case
|
|||
|
CMP AL,'r' ; Reverse sort?
|
|||
|
JNZ SWITCH_LOOP ; No, get next switch
|
|||
|
MOV CS:CODE_PATCH,72h ; sleaze JAE into JB
|
|||
|
JMP SWITCH_LOOP ; get next switch
|
|||
|
SWITCH_NUMBER:
|
|||
|
MOV COLUMN,0 ; start off at 0
|
|||
|
SWITCH_NEXT_NUMBER:
|
|||
|
CALL GET_CHAR ; get supposed digit
|
|||
|
SUB AL,'0' ; convert to number
|
|||
|
JB SWITCH_LOOP ; less than '0'
|
|||
|
CMP AL,9 ; is it a valid digit?
|
|||
|
JA SWITCH_LOOP ; nope, get next switch
|
|||
|
CBW ; make it a full word
|
|||
|
MOV BX,AX ; save byte away
|
|||
|
MOV AX,10 ; decimal number system
|
|||
|
MUL COLUMN ; take previous result
|
|||
|
ADD AX,BX ; add in low order digit
|
|||
|
MOV COLUMN,AX ; save away value
|
|||
|
JMP SWITCH_NEXT_NUMBER ; get next character
|
|||
|
GET_CHAR:
|
|||
|
JCXZ END_GET ; End of line
|
|||
|
DEC CX ; dec char count
|
|||
|
LODSB ; get the character
|
|||
|
RET ; return
|
|||
|
END_GET:
|
|||
|
POP AX ; nuke return on stack
|
|||
|
;
|
|||
|
; set up column for proper sort offset
|
|||
|
;
|
|||
|
END_SWITCH:
|
|||
|
ADD COLUMN,2
|
|||
|
CMP COLUMN,2
|
|||
|
JZ GOT_COL
|
|||
|
DEC COLUMN
|
|||
|
|
|||
|
;
|
|||
|
; Get sorting area, no more than 64K
|
|||
|
;
|
|||
|
GOT_COL:
|
|||
|
MOV BX,1000H ; 64K worth of paragraphs
|
|||
|
GET_MEM:
|
|||
|
sys ALLOC ; allocate them from somewhere
|
|||
|
JNC GOT_MEM ; if error, BX has amount free, try to get it
|
|||
|
OR BX,BX ; but, is BX = 0?
|
|||
|
JNZ GET_MEM ; nope, try to allocate it
|
|||
|
JMP SIZERR ; complain
|
|||
|
|
|||
|
GOT_MEM:
|
|||
|
MOV DS,AX ; Point DS to buffer
|
|||
|
MOV ES,AX ; and point ES to buffer
|
|||
|
MOV CL,4 ; 2^4 bytes per paragraph
|
|||
|
SHL BX,CL ; Find out how many bytes we have
|
|||
|
|
|||
|
;
|
|||
|
; clear out temporary record area
|
|||
|
;
|
|||
|
MOV CX,MAXREC/2 ; Size of temporary buffer (words)
|
|||
|
MOV AX,' ' ; Character to fill with
|
|||
|
MOV DI,SPACE ; Beginning of temp buffer
|
|||
|
REP STOSW ; Blam.
|
|||
|
;
|
|||
|
; read in file from standard input
|
|||
|
;
|
|||
|
MOV DX,BUFFER + 2 ; DX = place to begin reading
|
|||
|
MOV CX,BX ; CX is the max number to read
|
|||
|
SUB CX,MAXREC + 2 ; remember offset of temp buffer
|
|||
|
SORTL:
|
|||
|
XOR BX,BX ; Standard input
|
|||
|
sys READ ; Read it in
|
|||
|
ADD DX,AX ; Bump pointer by count read
|
|||
|
SUB CX,AX ; subtract from remaining the count read
|
|||
|
JZ SIZERR ; if buffer is full then error
|
|||
|
OR AX,AX ; no chars read -> end of file
|
|||
|
JNZ SORTL ; there were chars read. go read again
|
|||
|
JMP SHORT SIZOK ; trim last ^Z terminated record
|
|||
|
SIZERR:
|
|||
|
MOV SI,OFFSET DG:ERRMSG ; not enough memory error
|
|||
|
ERROR_EXIT:
|
|||
|
PUSH CS ; DS addressability
|
|||
|
POP DS
|
|||
|
LODSW ; get length
|
|||
|
MOV CX,AX ; put into appropriate register
|
|||
|
MOV DX,SI ; get output destination
|
|||
|
MOV BX,2 ; output to standard error
|
|||
|
sys WRITE ; and write it out
|
|||
|
MOV AL,1 ; return an error code
|
|||
|
sys EXIT
|
|||
|
|
|||
|
;
|
|||
|
; Look for a ^Z. Terminate buffer at 1st ^Z.
|
|||
|
;
|
|||
|
SIZOK:
|
|||
|
MOV BX,DX ; save end pointer
|
|||
|
MOV CX,DX ; get pointer to end of text
|
|||
|
SUB CX,BUFFER+2 ; dif in pointers is count
|
|||
|
MOV AL,1AH ; char is ^Z
|
|||
|
MOV DI,BUFFER+2 ; point to beginning of text
|
|||
|
REPNZ SCASB ; find one
|
|||
|
JNZ NoBack ; nope, try to find CRLF
|
|||
|
DEC BX ; pretend that we didn't see ^Z
|
|||
|
NoBack:
|
|||
|
SUB BX,CX ; sub from endpointer the number left
|
|||
|
SUB BX,2 ; Hope for a CR LF at end
|
|||
|
CMP WORD PTR [BX],0A0Dh ; Was there one there?
|
|||
|
JZ GOTEND ; yep, here is the end
|
|||
|
ADD BX,2 ; nope, bump back to SCASB spot
|
|||
|
CMP BYTE PTR [BX],AL ; Was there ^Z there?
|
|||
|
JZ GOTEND ; yep, chop it
|
|||
|
INC BX ; Nope, skip last char
|
|||
|
GOTEND:
|
|||
|
MOV BP,BX ; BP = filesize-2(CRLF)+temp buffer+2
|
|||
|
MOV WORD PTR DS:[BP],0 ; 0 at end of the file
|
|||
|
;
|
|||
|
; We now turn the entire buffer into a linked list of chains by
|
|||
|
; replacing CRLFs with the length of the following line (with 2 for CRLF)
|
|||
|
;
|
|||
|
MOV BX,BUFFER ; pointer to line head (length)
|
|||
|
MOV DI,BUFFER+2 ; pointer to line text
|
|||
|
REPLACE_LOOP:
|
|||
|
MOV AL,13 ; char to look for is CR
|
|||
|
MOV CX,BP ; count = end pointer
|
|||
|
SUB CX,DI ; chop off start point to get length
|
|||
|
INC CX ; add 1???
|
|||
|
REPLACE_SCAN:
|
|||
|
REPNZ SCASB ; look for CR
|
|||
|
JNZ REPLACE_SKIP ; count exhausted
|
|||
|
CMP BYTE PTR [DI],10 ; LF there?
|
|||
|
JNZ REPLACE_SCAN ; nope, continue scanning
|
|||
|
REPLACE_SKIP:
|
|||
|
MOV AX,DI ; AX to point after CR
|
|||
|
DEC AX ; AX to point to CR
|
|||
|
save <AX> ; save pointer
|
|||
|
SUB AX,BX ; AX is length of line found
|
|||
|
MOV [BX],AX ; stuff it in previous link
|
|||
|
restore <BX> ; get pointer to next
|
|||
|
INC DI ; skip LF???
|
|||
|
JCXZ END_REPLACE_LOOP ; no more to scan -> go sort
|
|||
|
JMP REPLACE_LOOP ; look for next
|
|||
|
|
|||
|
END_REPLACE_LOOP:
|
|||
|
MOV WORD PTR [BX],0 ; terminate file with nul
|
|||
|
LEA BP,[BX+2] ; remember the null line at end
|
|||
|
MOV DI,BUFFER ; DI is start of unsorted section
|
|||
|
|
|||
|
;
|
|||
|
; begin sort. Outer loop steps over all unsorted lines
|
|||
|
;
|
|||
|
OUTER_SORT_LOOP:
|
|||
|
MOV BX,DI ; BX is start of unsorted section
|
|||
|
MOV SI,BX ; SI is scanning place link
|
|||
|
CMP WORD PTR [BX],0 ; are we at the end of the buffer?
|
|||
|
JNZ INNER_SORT_LOOP ; No, do inner process
|
|||
|
JMP END_OUTER_SORT_LOOP ; yes, go dump out
|
|||
|
|
|||
|
;
|
|||
|
; BX points to best guy found so far. We scan through the sorted section
|
|||
|
; to find an appropriate insertion point
|
|||
|
;
|
|||
|
INNER_SORT_LOOP:
|
|||
|
ADD SI,[SI] ; link to next fellow
|
|||
|
MOV AX,[SI] ; get length of comparison guy
|
|||
|
OR AX,AX ; test for end of buffer
|
|||
|
JZ END_INNER_SORT_LOOP ; if zero then figure out insertion
|
|||
|
save <SI,DI> ; save SI,DI
|
|||
|
MOV DI,BX ; DI = pointer to tester link
|
|||
|
SUB AX,COLUMN ; adjust length for column
|
|||
|
JA AXOK ; more chars in tester than column?
|
|||
|
MOV SI,SPACE ; point SI to blank area
|
|||
|
MOV AX,MAXREC ; make AX be max length
|
|||
|
AXOK:
|
|||
|
MOV DX,[DI] ; get length of best guy
|
|||
|
SUB DX,COLUMN ; adjust length for column
|
|||
|
JA DXOK ; there are more chars after column
|
|||
|
MOV DI,SPACE ; point air to a space
|
|||
|
MOV DX,MAXREC ; really big record
|
|||
|
DXOK:
|
|||
|
MOV CX,AX ; AX is shortest record
|
|||
|
CMP AX,DX ; perhaps DX is shorter
|
|||
|
JB SMALL ; nope, leace CX alone
|
|||
|
MOV CX,DX ; DX is shorter, put length in CX
|
|||
|
SMALL:
|
|||
|
ADD DI,COLUMN ; offset into record
|
|||
|
ADD SI,COLUMN ; offset into other record
|
|||
|
if not internat
|
|||
|
REPZ CMPSB ; compare every one
|
|||
|
endif
|
|||
|
if internat
|
|||
|
push bx
|
|||
|
push ax
|
|||
|
mov bx,offset dg:table
|
|||
|
tloop: lodsb
|
|||
|
xlat byte ptr cs:[bx]
|
|||
|
mov ah,al
|
|||
|
mov al,es:[di]
|
|||
|
inc di
|
|||
|
xlat byte ptr cs:[bx]
|
|||
|
cmp ah,al
|
|||
|
loopz tloop
|
|||
|
pop ax
|
|||
|
pop bx
|
|||
|
endif
|
|||
|
restore <DI,SI> ; get head pointers back
|
|||
|
JNZ TESTED_NOT_EQUAL ; didn't exhaust counter, conditions set
|
|||
|
CMP AX,DX ; check string lengths
|
|||
|
TESTED_NOT_EQUAL:
|
|||
|
;
|
|||
|
; note! jae is patched to a jbe if file is to be sorted in reverse!
|
|||
|
;
|
|||
|
CODE_PATCH LABEL BYTE
|
|||
|
JAE INNER_SORT_LOOP ; if this one wasn't better then go again
|
|||
|
MOV BX,SI ; it was better, save header
|
|||
|
JMP INNER_SORT_LOOP ; and scan again
|
|||
|
|
|||
|
END_INNER_SORT_LOOP:
|
|||
|
MOV SI,BX ; SI is now the best person
|
|||
|
CMP SI,DI ; check best for current
|
|||
|
JZ END_INSERT ; best equals current, all done
|
|||
|
|
|||
|
;
|
|||
|
; SI points to best line found so far
|
|||
|
; DI points to a place to insert this line
|
|||
|
; DI is guaranteed to be < SI
|
|||
|
; make room for line at destination
|
|||
|
;
|
|||
|
MOV DX,[SI] ; get length of line
|
|||
|
save <SI,DI> ; save positions of people
|
|||
|
STD ; go right to left
|
|||
|
MOV CX,BP ; get end of file pointer
|
|||
|
SUB CX,DI ; get length from destination to end
|
|||
|
MOV SI,BP ; start from end
|
|||
|
DEC SI ; SI points to end of file
|
|||
|
MOV DI,SI ; destination is end of file
|
|||
|
ADD DI,DX ; DI points to new end of file
|
|||
|
REP MOVSB ; blam. Move every one up
|
|||
|
CLD ; back left to right
|
|||
|
restore <DI,SI> ; get old source and destination
|
|||
|
;
|
|||
|
; MOVE NEW LINE INTO PLACE
|
|||
|
;
|
|||
|
save <DI> ; save destination
|
|||
|
ADD SI,DX ; adjust for previous movement
|
|||
|
save <SI> ; save this value
|
|||
|
MOV CX,DX ; get number to move
|
|||
|
REP MOVSB ; blam. move the new line in
|
|||
|
restore <SI,DI> ; get back destination and new source
|
|||
|
;
|
|||
|
; DELETE LINE FROM OLD PLACE
|
|||
|
;
|
|||
|
save <DI> ; save destination
|
|||
|
MOV CX,BP ; pointer to end
|
|||
|
ADD CX,DX ; remember bump
|
|||
|
SUB CX,SI ; get count of bytes to move
|
|||
|
INC CX ; turn it into a word
|
|||
|
SHR CX,1 ; or a count of words
|
|||
|
MOV DI,SI ; new destination of move
|
|||
|
ADD SI,DX ; offset of block
|
|||
|
REP MOVSW ; blam, squeeze out the space
|
|||
|
restore <DI> ; get back original destination
|
|||
|
MOV WORD PTR DS:[BP-2],0 ; remake the end of file mark
|
|||
|
|
|||
|
END_INSERT:
|
|||
|
ADD DI,[DI] ; link to next guy
|
|||
|
JMP OUTER_SORT_LOOP ; and continue
|
|||
|
;
|
|||
|
; PUT BACK IN THE CR-LF
|
|||
|
;
|
|||
|
END_OUTER_SORT_LOOP:
|
|||
|
MOV DI,BUFFER ; start at beginning (where else)
|
|||
|
MOV CX,[DI] ; count of butes
|
|||
|
|
|||
|
INSERT_LOOP:
|
|||
|
ADD DI,CX ; point to next length
|
|||
|
MOV CX,[DI] ; get length
|
|||
|
MOV WORD PTR [DI],0A0DH ; replace length with CRLF
|
|||
|
CMP CX,0 ; check for end of file
|
|||
|
JNZ INSERT_LOOP ; nope, try again
|
|||
|
|
|||
|
WRITE_FILE:
|
|||
|
MOV DX,BUFFER+2 ; get starting point
|
|||
|
MOV CX,BP ; pointer to end of buffer
|
|||
|
SUB CX,DX ; dif in pointers is number of bytes
|
|||
|
MOV BX,1 ; to standard output
|
|||
|
sys WRITE ; write 'em out
|
|||
|
JC BADWRT ; some bizarre error -> flag it
|
|||
|
CMP AX,CX ; did we write what was expected?
|
|||
|
JZ WRTOK ; yes, say bye bye
|
|||
|
BADWRT:
|
|||
|
MOV SI,OFFSET dg:ERRMSG2 ; strange write error
|
|||
|
JMP ERROR_EXIT ; bye bye
|
|||
|
WRTOK:
|
|||
|
XOR AL,AL ; perfect return (by convention)
|
|||
|
sys EXIT ; bye!
|
|||
|
|
|||
|
CODE ENDS
|
|||
|
|
|||
|
CONST SEGMENT PUBLIC BYTE
|
|||
|
EXTRN BADVER:BYTE,ERRMSG:BYTE,ERRMSG2:BYTE
|
|||
|
if internat
|
|||
|
extrn table:byte
|
|||
|
endif
|
|||
|
CONST ENDS
|
|||
|
|
|||
|
SUBTTL Initialized Data
|
|||
|
PAGE
|
|||
|
CSTACK SEGMENT STACK
|
|||
|
DB 96 dup (0)
|
|||
|
CSTACK ENDS
|
|||
|
|
|||
|
END SORT
|
|||
|
|