MS-DOS/v2.0/source/SORT.ASM

420 lines
17 KiB
NASM
Raw Normal View History

1983-08-13 01:53:34 +01:00
TITLE SORT FILTER FOR MS-DOS
;
; Sort /R /+n
; /R -> reverse sort
; /+n -> sort on column n
;
; Written by: Chris Peters
;
; Modification History:
; 3-18-83 MZ Fix CR-LF at end of buffer
; Fix small file sorting
; Fix CR-LF line termination bug
; Comment the Damn source
;
FALSE EQU 0
TRUE EQU NOT FALSE
;NOTE: "internat" must be false if KANJI version
internat equ true
;NOTE: see above
.xlist
.xcref
INCLUDE DOSSYM.ASM
.cref
.list
sys MACRO name ; system call macro
MOV AH,name
INT 21h
ENDM
save MACRO reglist ; push those registers
IRP reg,<reglist>
PUSH reg
ENDM
ENDM
restore MACRO reglist ; pop those registers
IRP reg,<reglist>
POP reg
ENDM
ENDM
MAXREC EQU 256 ; MAXIMUM NUL RECORD SIZE
SPACE EQU 0 ; Offset zero in the allocated block
BUFFER EQU MAXREC ; Offset MAXREC in the allocated block
SUBTTL Segments used in load order
CODE SEGMENT
CODE ENDS
CONST SEGMENT PUBLIC BYTE
CONST ENDS
CSTACK SEGMENT STACK
DB 128 DUP (0) ; initial stack to be clear
CSTACK ENDS
DG GROUP CODE,CONST,CSTACK
CODE SEGMENT
ASSUME CS:DG,DS:NOTHING,ES:NOTHING,SS:CSTACK
COLUMN DW 0 ; COLUMN TO USE FOR KEY + 1
SWITCH DB '/'
SORT:
;
; check for proper version number of system
;
sys GET_VERSION
XCHG AH,AL ; Turn it around to AH.AL
CMP AX,200H ; Version 2.00 only
JAE OKDOS ; Success
MOV DX,OFFSET DG:BADVER ; Get error message
PUSH CS ; Get DS addressability
POP DS
sys STD_CON_STRING_OUTPUT ; Send to STDOUT
PUSH ES ; long segment
PUSH COLUMN ; offset zero
LONG_RET PROC FAR
RET ; long return to OS
LONG_RET ENDP
;
; get proper switch character
;
OKDOS:
MOV AL,0 ; Get current switch character
sys CHAR_OPER
MOV SWITCH,DL
;
; parse command line
;
MOV SI,80H ; pointer to command line
CLD ; go left to right
XOR CX,CX
LODSB
MOV CL,AL ; CX = length of command line
SWITCH_LOOP:
CALL GET_CHAR ; get a character
CMP AL,SWITCH ; beginning of switch?
JNZ SWITCH_LOOP ; No, get next character
CALL GET_CHAR ; get 1st char of switch
CMP AL,'+' ; Column to sort?
JZ SWITCH_NUMBER ; Yes, parse a number
OR AL,20h ; convert to lower case
CMP AL,'r' ; Reverse sort?
JNZ SWITCH_LOOP ; No, get next switch
MOV CS:CODE_PATCH,72h ; sleaze JAE into JB
JMP SWITCH_LOOP ; get next switch
SWITCH_NUMBER:
MOV COLUMN,0 ; start off at 0
SWITCH_NEXT_NUMBER:
CALL GET_CHAR ; get supposed digit
SUB AL,'0' ; convert to number
JB SWITCH_LOOP ; less than '0'
CMP AL,9 ; is it a valid digit?
JA SWITCH_LOOP ; nope, get next switch
CBW ; make it a full word
MOV BX,AX ; save byte away
MOV AX,10 ; decimal number system
MUL COLUMN ; take previous result
ADD AX,BX ; add in low order digit
MOV COLUMN,AX ; save away value
JMP SWITCH_NEXT_NUMBER ; get next character
GET_CHAR:
JCXZ END_GET ; End of line
DEC CX ; dec char count
LODSB ; get the character
RET ; return
END_GET:
POP AX ; nuke return on stack
;
; set up column for proper sort offset
;
END_SWITCH:
ADD COLUMN,2
CMP COLUMN,2
JZ GOT_COL
DEC COLUMN
;
; Get sorting area, no more than 64K
;
GOT_COL:
MOV BX,1000H ; 64K worth of paragraphs
GET_MEM:
sys ALLOC ; allocate them from somewhere
JNC GOT_MEM ; if error, BX has amount free, try to get it
OR BX,BX ; but, is BX = 0?
JNZ GET_MEM ; nope, try to allocate it
JMP SIZERR ; complain
GOT_MEM:
MOV DS,AX ; Point DS to buffer
MOV ES,AX ; and point ES to buffer
MOV CL,4 ; 2^4 bytes per paragraph
SHL BX,CL ; Find out how many bytes we have
;
; clear out temporary record area
;
MOV CX,MAXREC/2 ; Size of temporary buffer (words)
MOV AX,' ' ; Character to fill with
MOV DI,SPACE ; Beginning of temp buffer
REP STOSW ; Blam.
;
; read in file from standard input
;
MOV DX,BUFFER + 2 ; DX = place to begin reading
MOV CX,BX ; CX is the max number to read
SUB CX,MAXREC + 2 ; remember offset of temp buffer
SORTL:
XOR BX,BX ; Standard input
sys READ ; Read it in
ADD DX,AX ; Bump pointer by count read
SUB CX,AX ; subtract from remaining the count read
JZ SIZERR ; if buffer is full then error
OR AX,AX ; no chars read -> end of file
JNZ SORTL ; there were chars read. go read again
JMP SHORT SIZOK ; trim last ^Z terminated record
SIZERR:
MOV SI,OFFSET DG:ERRMSG ; not enough memory error
ERROR_EXIT:
PUSH CS ; DS addressability
POP DS
LODSW ; get length
MOV CX,AX ; put into appropriate register
MOV DX,SI ; get output destination
MOV BX,2 ; output to standard error
sys WRITE ; and write it out
MOV AL,1 ; return an error code
sys EXIT
;
; Look for a ^Z. Terminate buffer at 1st ^Z.
;
SIZOK:
MOV BX,DX ; save end pointer
MOV CX,DX ; get pointer to end of text
SUB CX,BUFFER+2 ; dif in pointers is count
MOV AL,1AH ; char is ^Z
MOV DI,BUFFER+2 ; point to beginning of text
REPNZ SCASB ; find one
JNZ NoBack ; nope, try to find CRLF
DEC BX ; pretend that we didn't see ^Z
NoBack:
SUB BX,CX ; sub from endpointer the number left
SUB BX,2 ; Hope for a CR LF at end
CMP WORD PTR [BX],0A0Dh ; Was there one there?
JZ GOTEND ; yep, here is the end
ADD BX,2 ; nope, bump back to SCASB spot
CMP BYTE PTR [BX],AL ; Was there ^Z there?
JZ GOTEND ; yep, chop it
INC BX ; Nope, skip last char
GOTEND:
MOV BP,BX ; BP = filesize-2(CRLF)+temp buffer+2
MOV WORD PTR DS:[BP],0 ; 0 at end of the file
;
; We now turn the entire buffer into a linked list of chains by
; replacing CRLFs with the length of the following line (with 2 for CRLF)
;
MOV BX,BUFFER ; pointer to line head (length)
MOV DI,BUFFER+2 ; pointer to line text
REPLACE_LOOP:
MOV AL,13 ; char to look for is CR
MOV CX,BP ; count = end pointer
SUB CX,DI ; chop off start point to get length
INC CX ; add 1???
REPLACE_SCAN:
REPNZ SCASB ; look for CR
JNZ REPLACE_SKIP ; count exhausted
CMP BYTE PTR [DI],10 ; LF there?
JNZ REPLACE_SCAN ; nope, continue scanning
REPLACE_SKIP:
MOV AX,DI ; AX to point after CR
DEC AX ; AX to point to CR
save <AX> ; save pointer
SUB AX,BX ; AX is length of line found
MOV [BX],AX ; stuff it in previous link
restore <BX> ; get pointer to next
INC DI ; skip LF???
JCXZ END_REPLACE_LOOP ; no more to scan -> go sort
JMP REPLACE_LOOP ; look for next
END_REPLACE_LOOP:
MOV WORD PTR [BX],0 ; terminate file with nul
LEA BP,[BX+2] ; remember the null line at end
MOV DI,BUFFER ; DI is start of unsorted section
;
; begin sort. Outer loop steps over all unsorted lines
;
OUTER_SORT_LOOP:
MOV BX,DI ; BX is start of unsorted section
MOV SI,BX ; SI is scanning place link
CMP WORD PTR [BX],0 ; are we at the end of the buffer?
JNZ INNER_SORT_LOOP ; No, do inner process
JMP END_OUTER_SORT_LOOP ; yes, go dump out
;
; BX points to best guy found so far. We scan through the sorted section
; to find an appropriate insertion point
;
INNER_SORT_LOOP:
ADD SI,[SI] ; link to next fellow
MOV AX,[SI] ; get length of comparison guy
OR AX,AX ; test for end of buffer
JZ END_INNER_SORT_LOOP ; if zero then figure out insertion
save <SI,DI> ; save SI,DI
MOV DI,BX ; DI = pointer to tester link
SUB AX,COLUMN ; adjust length for column
JA AXOK ; more chars in tester than column?
MOV SI,SPACE ; point SI to blank area
MOV AX,MAXREC ; make AX be max length
AXOK:
MOV DX,[DI] ; get length of best guy
SUB DX,COLUMN ; adjust length for column
JA DXOK ; there are more chars after column
MOV DI,SPACE ; point air to a space
MOV DX,MAXREC ; really big record
DXOK:
MOV CX,AX ; AX is shortest record
CMP AX,DX ; perhaps DX is shorter
JB SMALL ; nope, leace CX alone
MOV CX,DX ; DX is shorter, put length in CX
SMALL:
ADD DI,COLUMN ; offset into record
ADD SI,COLUMN ; offset into other record
if not internat
REPZ CMPSB ; compare every one
endif
if internat
push bx
push ax
mov bx,offset dg:table
tloop: lodsb
xlat byte ptr cs:[bx]
mov ah,al
mov al,es:[di]
inc di
xlat byte ptr cs:[bx]
cmp ah,al
loopz tloop
pop ax
pop bx
endif
restore <DI,SI> ; get head pointers back
JNZ TESTED_NOT_EQUAL ; didn't exhaust counter, conditions set
CMP AX,DX ; check string lengths
TESTED_NOT_EQUAL:
;
; note! jae is patched to a jbe if file is to be sorted in reverse!
;
CODE_PATCH LABEL BYTE
JAE INNER_SORT_LOOP ; if this one wasn't better then go again
MOV BX,SI ; it was better, save header
JMP INNER_SORT_LOOP ; and scan again
END_INNER_SORT_LOOP:
MOV SI,BX ; SI is now the best person
CMP SI,DI ; check best for current
JZ END_INSERT ; best equals current, all done
;
; SI points to best line found so far
; DI points to a place to insert this line
; DI is guaranteed to be < SI
; make room for line at destination
;
MOV DX,[SI] ; get length of line
save <SI,DI> ; save positions of people
STD ; go right to left
MOV CX,BP ; get end of file pointer
SUB CX,DI ; get length from destination to end
MOV SI,BP ; start from end
DEC SI ; SI points to end of file
MOV DI,SI ; destination is end of file
ADD DI,DX ; DI points to new end of file
REP MOVSB ; blam. Move every one up
CLD ; back left to right
restore <DI,SI> ; get old source and destination
;
; MOVE NEW LINE INTO PLACE
;
save <DI> ; save destination
ADD SI,DX ; adjust for previous movement
save <SI> ; save this value
MOV CX,DX ; get number to move
REP MOVSB ; blam. move the new line in
restore <SI,DI> ; get back destination and new source
;
; DELETE LINE FROM OLD PLACE
;
save <DI> ; save destination
MOV CX,BP ; pointer to end
ADD CX,DX ; remember bump
SUB CX,SI ; get count of bytes to move
INC CX ; turn it into a word
SHR CX,1 ; or a count of words
MOV DI,SI ; new destination of move
ADD SI,DX ; offset of block
REP MOVSW ; blam, squeeze out the space
restore <DI> ; get back original destination
MOV WORD PTR DS:[BP-2],0 ; remake the end of file mark
END_INSERT:
ADD DI,[DI] ; link to next guy
JMP OUTER_SORT_LOOP ; and continue
;
; PUT BACK IN THE CR-LF
;
END_OUTER_SORT_LOOP:
MOV DI,BUFFER ; start at beginning (where else)
MOV CX,[DI] ; count of butes
INSERT_LOOP:
ADD DI,CX ; point to next length
MOV CX,[DI] ; get length
MOV WORD PTR [DI],0A0DH ; replace length with CRLF
CMP CX,0 ; check for end of file
JNZ INSERT_LOOP ; nope, try again
WRITE_FILE:
MOV DX,BUFFER+2 ; get starting point
MOV CX,BP ; pointer to end of buffer
SUB CX,DX ; dif in pointers is number of bytes
MOV BX,1 ; to standard output
sys WRITE ; write 'em out
JC BADWRT ; some bizarre error -> flag it
CMP AX,CX ; did we write what was expected?
JZ WRTOK ; yes, say bye bye
BADWRT:
MOV SI,OFFSET dg:ERRMSG2 ; strange write error
JMP ERROR_EXIT ; bye bye
WRTOK:
XOR AL,AL ; perfect return (by convention)
sys EXIT ; bye!
CODE ENDS
CONST SEGMENT PUBLIC BYTE
EXTRN BADVER:BYTE,ERRMSG:BYTE,ERRMSG2:BYTE
if internat
extrn table:byte
endif
CONST ENDS
SUBTTL Initialized Data
PAGE
CSTACK SEGMENT STACK
DB 96 dup (0)
CSTACK ENDS
END SORT