MS-DOS/v2.0/source/SORT.ASM

420 lines
17 KiB
NASM
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

TITLE SORT FILTER FOR MS-DOS
;
; Sort /R /+n
; /R -> reverse sort
; /+n -> sort on column n
;
; Written by: Chris Peters
;
; Modification History:
; 3-18-83 MZ Fix CR-LF at end of buffer
; Fix small file sorting
; Fix CR-LF line termination bug
; Comment the Damn source
;
FALSE EQU 0
TRUE EQU NOT FALSE
;NOTE: "internat" must be false if KANJI version
internat equ true
;NOTE: see above
.xlist
.xcref
INCLUDE DOSSYM.ASM
.cref
.list
sys MACRO name ; system call macro
MOV AH,name
INT 21h
ENDM
save MACRO reglist ; push those registers
IRP reg,<reglist>
PUSH reg
ENDM
ENDM
restore MACRO reglist ; pop those registers
IRP reg,<reglist>
POP reg
ENDM
ENDM
MAXREC EQU 256 ; MAXIMUM NUL RECORD SIZE
SPACE EQU 0 ; Offset zero in the allocated block
BUFFER EQU MAXREC ; Offset MAXREC in the allocated block
SUBTTL Segments used in load order
CODE SEGMENT
CODE ENDS
CONST SEGMENT PUBLIC BYTE
CONST ENDS
CSTACK SEGMENT STACK
DB 128 DUP (0) ; initial stack to be clear
CSTACK ENDS
DG GROUP CODE,CONST,CSTACK
CODE SEGMENT
ASSUME CS:DG,DS:NOTHING,ES:NOTHING,SS:CSTACK
COLUMN DW 0 ; COLUMN TO USE FOR KEY + 1
SWITCH DB '/'
SORT:
;
; check for proper version number of system
;
sys GET_VERSION
XCHG AH,AL ; Turn it around to AH.AL
CMP AX,200H ; Version 2.00 only
JAE OKDOS ; Success
MOV DX,OFFSET DG:BADVER ; Get error message
PUSH CS ; Get DS addressability
POP DS
sys STD_CON_STRING_OUTPUT ; Send to STDOUT
PUSH ES ; long segment
PUSH COLUMN ; offset zero
LONG_RET PROC FAR
RET ; long return to OS
LONG_RET ENDP
;
; get proper switch character
;
OKDOS:
MOV AL,0 ; Get current switch character
sys CHAR_OPER
MOV SWITCH,DL
;
; parse command line
;
MOV SI,80H ; pointer to command line
CLD ; go left to right
XOR CX,CX
LODSB
MOV CL,AL ; CX = length of command line
SWITCH_LOOP:
CALL GET_CHAR ; get a character
CMP AL,SWITCH ; beginning of switch?
JNZ SWITCH_LOOP ; No, get next character
CALL GET_CHAR ; get 1st char of switch
CMP AL,'+' ; Column to sort?
JZ SWITCH_NUMBER ; Yes, parse a number
OR AL,20h ; convert to lower case
CMP AL,'r' ; Reverse sort?
JNZ SWITCH_LOOP ; No, get next switch
MOV CS:CODE_PATCH,72h ; sleaze JAE into JB
JMP SWITCH_LOOP ; get next switch
SWITCH_NUMBER:
MOV COLUMN,0 ; start off at 0
SWITCH_NEXT_NUMBER:
CALL GET_CHAR ; get supposed digit
SUB AL,'0' ; convert to number
JB SWITCH_LOOP ; less than '0'
CMP AL,9 ; is it a valid digit?
JA SWITCH_LOOP ; nope, get next switch
CBW ; make it a full word
MOV BX,AX ; save byte away
MOV AX,10 ; decimal number system
MUL COLUMN ; take previous result
ADD AX,BX ; add in low order digit
MOV COLUMN,AX ; save away value
JMP SWITCH_NEXT_NUMBER ; get next character
GET_CHAR:
JCXZ END_GET ; End of line
DEC CX ; dec char count
LODSB ; get the character
RET ; return
END_GET:
POP AX ; nuke return on stack
;
; set up column for proper sort offset
;
END_SWITCH:
ADD COLUMN,2
CMP COLUMN,2
JZ GOT_COL
DEC COLUMN
;
; Get sorting area, no more than 64K
;
GOT_COL:
MOV BX,1000H ; 64K worth of paragraphs
GET_MEM:
sys ALLOC ; allocate them from somewhere
JNC GOT_MEM ; if error, BX has amount free, try to get it
OR BX,BX ; but, is BX = 0?
JNZ GET_MEM ; nope, try to allocate it
JMP SIZERR ; complain
GOT_MEM:
MOV DS,AX ; Point DS to buffer
MOV ES,AX ; and point ES to buffer
MOV CL,4 ; 2^4 bytes per paragraph
SHL BX,CL ; Find out how many bytes we have
;
; clear out temporary record area
;
MOV CX,MAXREC/2 ; Size of temporary buffer (words)
MOV AX,' ' ; Character to fill with
MOV DI,SPACE ; Beginning of temp buffer
REP STOSW ; Blam.
;
; read in file from standard input
;
MOV DX,BUFFER + 2 ; DX = place to begin reading
MOV CX,BX ; CX is the max number to read
SUB CX,MAXREC + 2 ; remember offset of temp buffer
SORTL:
XOR BX,BX ; Standard input
sys READ ; Read it in
ADD DX,AX ; Bump pointer by count read
SUB CX,AX ; subtract from remaining the count read
JZ SIZERR ; if buffer is full then error
OR AX,AX ; no chars read -> end of file
JNZ SORTL ; there were chars read. go read again
JMP SHORT SIZOK ; trim last ^Z terminated record
SIZERR:
MOV SI,OFFSET DG:ERRMSG ; not enough memory error
ERROR_EXIT:
PUSH CS ; DS addressability
POP DS
LODSW ; get length
MOV CX,AX ; put into appropriate register
MOV DX,SI ; get output destination
MOV BX,2 ; output to standard error
sys WRITE ; and write it out
MOV AL,1 ; return an error code
sys EXIT
;
; Look for a ^Z. Terminate buffer at 1st ^Z.
;
SIZOK:
MOV BX,DX ; save end pointer
MOV CX,DX ; get pointer to end of text
SUB CX,BUFFER+2 ; dif in pointers is count
MOV AL,1AH ; char is ^Z
MOV DI,BUFFER+2 ; point to beginning of text
REPNZ SCASB ; find one
JNZ NoBack ; nope, try to find CRLF
DEC BX ; pretend that we didn't see ^Z
NoBack:
SUB BX,CX ; sub from endpointer the number left
SUB BX,2 ; Hope for a CR LF at end
CMP WORD PTR [BX],0A0Dh ; Was there one there?
JZ GOTEND ; yep, here is the end
ADD BX,2 ; nope, bump back to SCASB spot
CMP BYTE PTR [BX],AL ; Was there ^Z there?
JZ GOTEND ; yep, chop it
INC BX ; Nope, skip last char
GOTEND:
MOV BP,BX ; BP = filesize-2(CRLF)+temp buffer+2
MOV WORD PTR DS:[BP],0 ; 0 at end of the file
;
; We now turn the entire buffer into a linked list of chains by
; replacing CRLFs with the length of the following line (with 2 for CRLF)
;
MOV BX,BUFFER ; pointer to line head (length)
MOV DI,BUFFER+2 ; pointer to line text
REPLACE_LOOP:
MOV AL,13 ; char to look for is CR
MOV CX,BP ; count = end pointer
SUB CX,DI ; chop off start point to get length
INC CX ; add 1???
REPLACE_SCAN:
REPNZ SCASB ; look for CR
JNZ REPLACE_SKIP ; count exhausted
CMP BYTE PTR [DI],10 ; LF there?
JNZ REPLACE_SCAN ; nope, continue scanning
REPLACE_SKIP:
MOV AX,DI ; AX to point after CR
DEC AX ; AX to point to CR
save <AX> ; save pointer
SUB AX,BX ; AX is length of line found
MOV [BX],AX ; stuff it in previous link
restore <BX> ; get pointer to next
INC DI ; skip LF???
JCXZ END_REPLACE_LOOP ; no more to scan -> go sort
JMP REPLACE_LOOP ; look for next
END_REPLACE_LOOP:
MOV WORD PTR [BX],0 ; terminate file with nul
LEA BP,[BX+2] ; remember the null line at end
MOV DI,BUFFER ; DI is start of unsorted section
;
; begin sort. Outer loop steps over all unsorted lines
;
OUTER_SORT_LOOP:
MOV BX,DI ; BX is start of unsorted section
MOV SI,BX ; SI is scanning place link
CMP WORD PTR [BX],0 ; are we at the end of the buffer?
JNZ INNER_SORT_LOOP ; No, do inner process
JMP END_OUTER_SORT_LOOP ; yes, go dump out
;
; BX points to best guy found so far. We scan through the sorted section
; to find an appropriate insertion point
;
INNER_SORT_LOOP:
ADD SI,[SI] ; link to next fellow
MOV AX,[SI] ; get length of comparison guy
OR AX,AX ; test for end of buffer
JZ END_INNER_SORT_LOOP ; if zero then figure out insertion
save <SI,DI> ; save SI,DI
MOV DI,BX ; DI = pointer to tester link
SUB AX,COLUMN ; adjust length for column
JA AXOK ; more chars in tester than column?
MOV SI,SPACE ; point SI to blank area
MOV AX,MAXREC ; make AX be max length
AXOK:
MOV DX,[DI] ; get length of best guy
SUB DX,COLUMN ; adjust length for column
JA DXOK ; there are more chars after column
MOV DI,SPACE ; point air to a space
MOV DX,MAXREC ; really big record
DXOK:
MOV CX,AX ; AX is shortest record
CMP AX,DX ; perhaps DX is shorter
JB SMALL ; nope, leace CX alone
MOV CX,DX ; DX is shorter, put length in CX
SMALL:
ADD DI,COLUMN ; offset into record
ADD SI,COLUMN ; offset into other record
if not internat
REPZ CMPSB ; compare every one
endif
if internat
push bx
push ax
mov bx,offset dg:table
tloop: lodsb
xlat byte ptr cs:[bx]
mov ah,al
mov al,es:[di]
inc di
xlat byte ptr cs:[bx]
cmp ah,al
loopz tloop
pop ax
pop bx
endif
restore <DI,SI> ; get head pointers back
JNZ TESTED_NOT_EQUAL ; didn't exhaust counter, conditions set
CMP AX,DX ; check string lengths
TESTED_NOT_EQUAL:
;
; note! jae is patched to a jbe if file is to be sorted in reverse!
;
CODE_PATCH LABEL BYTE
JAE INNER_SORT_LOOP ; if this one wasn't better then go again
MOV BX,SI ; it was better, save header
JMP INNER_SORT_LOOP ; and scan again
END_INNER_SORT_LOOP:
MOV SI,BX ; SI is now the best person
CMP SI,DI ; check best for current
JZ END_INSERT ; best equals current, all done
;
; SI points to best line found so far
; DI points to a place to insert this line
; DI is guaranteed to be < SI
; make room for line at destination
;
MOV DX,[SI] ; get length of line
save <SI,DI> ; save positions of people
STD ; go right to left
MOV CX,BP ; get end of file pointer
SUB CX,DI ; get length from destination to end
MOV SI,BP ; start from end
DEC SI ; SI points to end of file
MOV DI,SI ; destination is end of file
ADD DI,DX ; DI points to new end of file
REP MOVSB ; blam. Move every one up
CLD ; back left to right
restore <DI,SI> ; get old source and destination
;
; MOVE NEW LINE INTO PLACE
;
save <DI> ; save destination
ADD SI,DX ; adjust for previous movement
save <SI> ; save this value
MOV CX,DX ; get number to move
REP MOVSB ; blam. move the new line in
restore <SI,DI> ; get back destination and new source
;
; DELETE LINE FROM OLD PLACE
;
save <DI> ; save destination
MOV CX,BP ; pointer to end
ADD CX,DX ; remember bump
SUB CX,SI ; get count of bytes to move
INC CX ; turn it into a word
SHR CX,1 ; or a count of words
MOV DI,SI ; new destination of move
ADD SI,DX ; offset of block
REP MOVSW ; blam, squeeze out the space
restore <DI> ; get back original destination
MOV WORD PTR DS:[BP-2],0 ; remake the end of file mark
END_INSERT:
ADD DI,[DI] ; link to next guy
JMP OUTER_SORT_LOOP ; and continue
;
; PUT BACK IN THE CR-LF
;
END_OUTER_SORT_LOOP:
MOV DI,BUFFER ; start at beginning (where else)
MOV CX,[DI] ; count of butes
INSERT_LOOP:
ADD DI,CX ; point to next length
MOV CX,[DI] ; get length
MOV WORD PTR [DI],0A0DH ; replace length with CRLF
CMP CX,0 ; check for end of file
JNZ INSERT_LOOP ; nope, try again
WRITE_FILE:
MOV DX,BUFFER+2 ; get starting point
MOV CX,BP ; pointer to end of buffer
SUB CX,DX ; dif in pointers is number of bytes
MOV BX,1 ; to standard output
sys WRITE ; write 'em out
JC BADWRT ; some bizarre error -> flag it
CMP AX,CX ; did we write what was expected?
JZ WRTOK ; yes, say bye bye
BADWRT:
MOV SI,OFFSET dg:ERRMSG2 ; strange write error
JMP ERROR_EXIT ; bye bye
WRTOK:
XOR AL,AL ; perfect return (by convention)
sys EXIT ; bye!
CODE ENDS
CONST SEGMENT PUBLIC BYTE
EXTRN BADVER:BYTE,ERRMSG:BYTE,ERRMSG2:BYTE
if internat
extrn table:byte
endif
CONST ENDS
SUBTTL Initialized Data
PAGE
CSTACK SEGMENT STACK
DB 96 dup (0)
CSTACK ENDS
END SORT