_STATISTICAL PERFORMANCE ANALYSIS_ by Fred Motteler [LISTING ONE] /* patest.c -- A collection of simple routines to test the accuracy of ** statistical performance analysis programs for the PC and ES 1800 emulator. */ /* Default delay timing parameters */ #define DELAY_ONE 1 #define DELAY_TWO 2 #define DELAY_THREE 3 #define DELAY_FOUR 4 #define DELAY_FIVE 5 #define DELAY_SIX 6 #define DELAY_SEVEN 7 #define DELAY_EIGHT 8 #define DELAY_NINE 9 #define DELAY_TEN 10 #define DELAY_ELEVEN 11 #define DELAY_TWELVE 12 #define DELAY_THIRTEEN 13 #define DELAY_FOURTEEN 14 #define DELAY_FIFTEEN 15 #define DELAY_SCALE 10 /* Effectively muliplies values by 1000 */ #define DELAY_LOOPS 5 /* Default number of times thru main loop */ /* Loop delay parameters. These are done as globals to allow easy access to ** the timing parameters via the ES 1800 emulator. This allows different ** timing configurations to be tested without having to recompile and link ** this code. Kludgie, but it encourages easy experimentation. */ long final_sumL = 0; int time_oneN = DELAY_ONE; int time_twoN = DELAY_TWO; int time_threeN = DELAY_THREE; int time_fourN = DELAY_FOUR; int time_fiveN = DELAY_FIVE; int time_sixN = DELAY_SIX; int time_sevenN = DELAY_SEVEN; int time_eightN = DELAY_EIGHT; int time_nineN = DELAY_NINE; int time_tenN = DELAY_TEN; int time_elevenN = DELAY_ELEVEN; int time_twelveN = DELAY_TWELVE; int time_thirteenN = DELAY_THIRTEEN; int time_fourteenN = DELAY_FOURTEEN; int time_fifteenN = DELAY_FIFTEEN; int scaleN = DELAY_SCALE; int looptoloopN = DELAY_LOOPS; /* Function: long delay_xxxx(int delayN) ** Description: These are simple functions designed to allow varied delays. ** The code in each delay function is the identical to the code in all of ** the other delay functions. This allows accurate comparision of the ** relative execution time of each function. Fifteen of these functions should ** be a reasonable number to represent a simple "real" program. */ long delay_one(delayN) int delayN; { int i; long sumL; sumL = 0L; delayN <<= scaleN; for (i = 0; i < delayN; i++) sumL += (long) i; return(sumL); } long delay_two(delayN) int delayN; { int i; long sumL; sumL = 0L; delayN <<= scaleN; for (i = 0; i < delayN; i++) sumL += (long) i; return(sumL); } . . . . long delay_fifteen(delayN) int delayN; { int i; long sumL; sumL = 0L; delayN <<= scaleN; for (i = 0; i < delayN; i++) sumL += (long) i; return(sumL); } /* Function: void main() ** Description: This is a simple routine to run the various delay routines. ** The delay time variables are all globals to allow experimentation with ** the timing parameters using the ES 1800 emulator. */ void main() { int i; final_sumL = 0L; for (i = 0; i < looptoloopN; i++) { final_sumL += delay_one(time_oneN); final_sumL += delay_two(time_twoN); final_sumL += delay_three(time_threeN); final_sumL += delay_four(time_fourN); final_sumL += delay_five(time_fiveN); final_sumL += delay_six(time_sixN); final_sumL += delay_seven(time_sevenN); final_sumL += delay_eight(time_eightN); final_sumL += delay_nine(time_nineN); final_sumL += delay_ten(time_tenN); final_sumL += delay_eleven(time_elevenN); final_sumL += delay_twelve(time_twelveN); final_sumL += delay_thirteen(time_thirteenN); final_sumL += delay_fourteen(time_fourteenN); final_sumL += delay_fifteen(time_fifteenN); } } [LISTING TWO] TITLE patick - IBM PC / Clone Clock Tick CS:IP Grabber ; File: patick.asm--Fred Motteler and Applied Microsystems Corporation ; Copyright 1990. All Rights Reserved ; Description: ; This file contains three functions: ; C callable: ; void painit(bufferLP, lengthN) Initialize grabber interrupt vector ; int paclose() Close grabber interrupt vector ; Interrupt routine, this is treated like part of painit(): ; patick Grab CS:IP value ; These functions are configured for small model. ; Stack frame structure for painit(): ; stkfr STRUC OLD_FR DW ? ; Previous stack frame pointer RETADDR DW ? ; Return address to caller BUFFERP DW ? ; Pointer to buffer to use BUFLEN DW ? ; Length of buffer (in longwords) stkfr ENDS ; ; Stack frame structure for clock tick timer routine. intfr STRUC INT_FR DW ? ; Pre-interrupt stack frame pointer IP_VAL DW ? ; Pre-interrupt IP value CS_VAL DW ? ; Pre-interrupt CS value intfr ENDS TIMER EQU 8h ; Timer interrupt vector number DGROUP GROUP _DATA _DATA SEGMENT WORD PUBLIC 'DATA' ASSUME DS:DGROUP bufptr DW 0 ; Starting point of buffer bufsiz DW 0 ; Number of longwords in the buffer bufindx DW 0 ; Next location of buffer to use bufwrap DB 0 ; Flag if buffer has wrapped... _DATA ENDS _TEXT SEGMENT BYTE PUBLIC 'CODE' ASSUME CS:_TEXT ; ; void paopen (unsigned long *bufferLP, int lengthN) ; This a C callable function to initialize the CS:IP grabber and ; start it up. bufferLP points the buffer of where to write CS:IP ; values. lengthN is the length of the buffer in longwords. PUBLIC paopen paopen PROC NEAR push bp mov bp,sp push si push di push es ; ; Set up the local buffer pointer values from those passed on the stack. mov ax,[bp].BUFFERP ; Get pointer to start of buffer mov bufptr,ax mov ax,[bp].BUFLEN ; Get length of the buffer shl ax,1 ; convert longword length to byte length shl ax,1 mov bufsiz,ax xor ax,ax ; Start at the beginning of the buffer mov bufindx,ax mov bufwrap,al ; Reset buffer wrap flag ; ; Save the original clock tick interrupt vector. mov al,TIMER ; interrupt number into al mov ah,35h ; DOS function = get vector int 21h ; DOS returns old vector in es:bx mov cs:oldseg,es ; save old segment mov cs:oldoff,bx ; save old offset ; ; Disable interrupts while changing the interrupt vector. cli ; ; Change clock tick interrupt routine to point at local interrupt routine. mov al,TIMER ; vector number mov ah,25h ; DOS function = set vector mov dx,OFFSET patick ; point to our interrupt handler push ds ; don't lose ds, we need to get to local data push cs ; move this cs to ds pop ds ; int 21h ; set the new vector pop ds ; restore ds ; ; Enable interrupts and return; pop es pop di pop si pop bp sti ret ; ; Clock tick grabber routine. This routine samples CS:IP that were pushed ; on to the stack when the interrupt occurs. patick: push bp mov bp,sp ; Treat CS:IP values like stack frame push ax push bx push ds ; ; Get the local ds to allow access to local variables. mov ax,DGROUP mov ds,ax ; ; Use bx as a pointer to the recording buffer mov bx,bufptr add bx,bufindx ; ; Grab the pre-interrupt CS:IP values off the stack mov ax,[bp].IP_VAL ; grab the IP mov [bx],ax ; save the IP in the recording buffer inc bx inc bx mov ax,[bp].CS_VAL ; grab the CS mov [bx],ax ; save the CS in the recording buffer inc bx inc bx ; ; Check if we are at the end of the buffer sub bx,bufptr ; get the byte offset index back again mov ax,bufsiz ; get the buffer byte length cmp ax,bx jne notend ; jump if not at the end of the buffer ; ; At the end of the buffer mov bx,0 ; reset the buffer index mov al,0ffh ; set flag to indicate buffer wrap mov bufwrap,al ; ; Write out modified buffer index notend: mov bufindx,bx ; ; Clean up pop ds pop bx pop ax pop bp ; ; Jump to the original interrupt service routine. An immediate jump ; is used so no segment registers are required. DB 0eah ; jmp immediate, to the offset:segment ; selected below (brute force approach). ; Original interrupt handler's offset and segment values. These are ; in the current code segment to allow the interrupt routine given ; here to directly jump to the original interrupt routine. oldoff DW 0 ; Room for original timer interrupt offset oldseg DW 0 ; Room for original timer interrupt segment paopen ENDP ; ; int paclose() This is a C callable function to close CS:IP grabber and ; return the number of CS:IP values grabbed. PUBLIC paclose paclose PROC NEAR push bp mov bp,sp push si push di push es ; ; Disable interrupts while the original interrupt vector is restored. cli mov al,TIMER ; get interrupt number mov ah,25h ; DOS function = set vector push ds ; mov dx,cs:oldoff ; old timer offset mov ds,cs:oldseg ; old timer segment int 21h ; restore old vector pop ds ; ; ; Enable interrupts. sti ; ; Calculate the number of CS:IP values cmp bufwrap,0 ; check if the buffer has wrapped jne wrapped ; jump if it has wrapped mov ax,bufindx ; no wrap, return buffer index as count jmp done wrapped: mov ax,bufsiz ; wrapped, return buffer size as count ; ; Clean up stack and return done: shr ax,1 ; Return count in number of CS:IP pairs shr ax,1 pop es pop di pop si pop bp ret paclose ENDP _TEXT ENDS END [LISTING THREE] /* pawhere.c -- contains a very simple program that returns its segment base ** address. Note that this program is Lattice version 6.01 specific in that ** the Lattice small model has "main" at the beginning of the exectable ** portion of the program. Other compiler/linker packages may require that the ** program map be examined for the module that starts the program. ** Copyright 1990 Fred Motteler and Applied Microsystems Corporation */ #include #include unsigned int main() { FILE *fp; fp = fopen("pawhere.tmp", "w"); fprintf(fp, "%x %x\n", (FP_SEG((char far *) main)), (FP_OFF((char far *) main))); fclose(fp); exit(0); } [LISTING FOUR] /* pamsdos.c -- Utility functions used by MS-DOS version of the statistical ** performance analysis package. ** Copyright 1990 Fred Motteler and Applied Microsystems Corporation */ #include #include #include #include #include "padef.h" /* Function: int main( argcN, argvAS ) ** Description: MS-DOS based statistical performance analysis program. ** Command line arguments: pamsdos prog.map prog.cfg prog.exe options ** Where: prog.map = memory map for program; prog.cfg = memory map ** configuration; prog.exe = program to run; options = command options ** for the program to run */ int main( argcN, argvAS ) int argcN; char *argvAS[]; { int errorN; /* Error code */ unsigned int segmentW; /* Starting load address of program to run */ unsigned int offsetW; unsigned long originL; int processedN; /* Number of map globals processed */ int i; /* General index */ FILE *mapFP; /* Map file to read */ FILE *formatFP; /* File with map file format information */ char commandAC[PA_LINE_LEN]; /* Complete command line for program */ int pagelinesN; /* Number of lines on output page, 0 if * continuous, -1 if no display output, else * n if n lines per page. */ FILE *listFP; /* Results output file */ char listAB[80]; /* Optional results listing file path/name */ char pagelinesAB[8]; /* String for number of lines/page */ printf("pamsdos - Statistical performance analysis tool for MS-DOS\n"); printf("Version %s\n", PA_VERSION); printf("Copyright (C) 1990 Fred Motteler and Applied Microsystems Corp\n"); if (argcN < 4) { printf("Usage: pamsdos prog.map prog.cfg prog.exe [options]\n"); printf(" Where: prog.map memory map for program\n"); printf(" prog.cfg memory map configuration file\n"); printf(" prog.exe program to run\n"); printf(" [options] command line options for program to run\n"); exit(-100); } /* Determine where the program to run is to be located. */ if ((errorN = pa_locate(&segmentW, &offsetW)) != 0) { pa_error(errorN); exit(errorN); } /* Calculate origin of program. Room must be allowed for memory * malloc()'d off the heap. */ originL = (unsigned long) (segmentW + 1); originL <<= 4; originL += (unsigned long) (offsetW - 2); originL += (unsigned long) (PA_BUFLEN << 2); if ((pa_debugN & PA_GENERAL) != 0) { printf("program start segment:offset %x:%x\n", segmentW, offsetW); printf(" linear address %lx\n",originL); } /* Get the complete command line to invoke the program. */ strcpy(commandAC, argvAS[3]); if (argcN > 4) { for (i = 4; i < argcN; i++) { strcat(commandAC," "); strcat(commandAC,argvAS[i]); } } /* Run the program and collect samples. */ printf("Starting %s\n", argvAS[3]); if ((errorN = pa_pcsample(commandAC, PA_SAMPLE, PA_BUFLEN)) != 0) { pa_error(errorN); exit(errorN); } /* Read in the configuration file to get map format information and * to get number of lines / display page and option listing file. */ if ((formatFP = fopen(argvAS[2], "r")) == (FILE *) NULL) { pa_error(PA_NO_CFG_E); exit(PA_NO_CFG_E); } /* Read in display lines, and optional output file configuration data * from the configuration file. */ if (((errorN = paconfig(formatFP, PA_PAGELINES, pagelinesAB)) != 0) || ((errorN = paconfig(formatFP, PA_LISTFILE, listAB)) != 0)) { pa_error(errorN); fclose(formatFP); exit(errorN); } /* Determine the number of lines/page to display */ if (sscanf(pagelinesAB, "%d", &pagelinesN) != 1) { pa_error(PA_BAD_ARG_E); fclose(formatFP); exit(PA_BAD_ARG_E); } /* Open the optional listing file */ if (listAB[0] == '\0') listFP = (FILE *) NULL; else if ((listFP = fopen(listAB, "w")) == (FILE *) NULL) { pa_error(PA_NO_LST_E); fclose(formatFP); exit(PA_NO_LST_E); } /* Read program's memory map and create "bins" for program counter samples. */ if ((mapFP = fopen(argvAS[1], "r")) == (FILE *) NULL) { pa_error(PA_NO_MAP_E); fclose(mapFP); exit(PA_NO_MAP_E); } if ((errorN = pardmap(mapFP, formatFP, originL, &processedN)) != 0) { pa_error(errorN); fclose(mapFP); fclose(formatFP); exit(errorN); } /* Process the samples and sort the bins according to the PC hits in * each bin. */ printf("Processing samples\n"); if ((errorN = pa_bstuff(PA_SAMPLE, patableAHP, &processedN)) != 0) { pa_error(errorN); fclose(mapFP); fclose(formatFP); exit(errorN); } /* Display the results */ padisply(patableAHP, processedN, pagelinesN, listFP); fclose(mapFP); fclose(formatFP); exit(0); } /* Function: int pa_locate(unsigned int *segmentPW, unsigned int *offsetPW) ** Description: This function figures out where in memory the program to be ** analyzed is to be run. MS-DOS executables are dynamically located at ** runtime. In order to avoid the complexity of writing a DOS ".exe" loader ** program, a simpler approach is used here. This function uses the ANSI ** system() library function to execute a trial program, "pawhere.exe" that ** writes its starting code segment and offset to a temporary file ** "pawhere.tmp". After "pawhere.exe" has finished, this function opens the ** temporary file and reads the starting segment and offset value. It is ** assumed that the desired program to be tested will have the same starting ** code segment and offset. If all operations were successful, then 0 is ** returned. Otherwise a non-zero error code will be returned.*/ int pa_locate(segmentPW, offsetPW) unsigned int *segmentPW; unsigned int *offsetPW; { FILE *fp; /* First figure out where the program will be loaded. Run "pawhere.exe" * via a system() function call. */ if ((system("pawhere")) != 0) return(PA_NO_WHERE_E); /* Read in the result of whereami.tmp. */ if ((fp = fopen("pawhere.tmp", "r")) == (FILE *) NULL) return(PA_NO_TMP_E); if ((fscanf(fp, "%x %x", segmentPW, offsetPW)) != 2) return(PA_BAD_TMP_E); fclose(fp); if (remove("pawhere.tmp") != 0) return(PA_TMP_RM_E); return(0); } /* Function: int pa_pcsample(char *programS, char *sampfileS, int samplesN) ** Description: This function runs the program (entire command line) pointed ** to by programS, while sampling its program counter every PC clock tick. ** Up to samplesN program counter samples are collected, and then written ** out in binary format to the file sampfiles.*/ int pa_pcsample(programS, sampfileS, samplesN) char *programS; /* Command line of program to run */ char *sampfileS; /* File to use to write out pc samples */ int samplesN; /* Maximum number of samples to collect */ { unsigned long *pcbufferPL; /* Word pointer to local pc sample buffer */ unsigned int *pcbufferPW; /* Long pointer to local pc sample buffer */ unsigned long *pcorgPL; /* Original copy of pointer to pc sample buf */ unsigned int segmentW; /* Starting segment of program to run */ unsigned int offsetW; /* Starting offset of program to run */ int handleN; /* pc sample file handle */ unsigned long sampleL; /* segment:offset sample converted to linear */ int i; /* general index */ /* Grab memory for the sample buffer */ if ((pcbufferPL = (unsigned long *) malloc((4*samplesN))) == (unsigned long *) NULL) return(PA_NO_MEM_E); /* Copy buffer pointer to allow word (int) access as well as long access.*/ pcbufferPW = (unsigned int *) pcbufferPL; pcorgPL = pcbufferPL; /* Start CS:IP sampling */ paopen(pcbufferPW, samplesN); /* Run the desired program. */ if (system(programS) != 0) { paclose(); return(PA_NO_EXEC_E); } /* Stop sampling */ samplesN = paclose(); /* Convert the samples from offsetW:segment to linear addresses relative * to the origin of the loaded program. */ if ((pa_debugN & PA_GENERAL) != 0) printf("pa_pcsample: number of samples: %d\n", samplesN); for (i = 0; i < samplesN; i++) { /* Read segment:offset value from the table. */ offsetW = *pcbufferPW++; segmentW = *pcbufferPW++; if ((pa_debugN & PA_GENERAL) != 0) printf("pa_pcsample: sample segment:offset %x:%x\n", segmentW,offsetW); /* Convert it to a linear address. */ sampleL = ((unsigned long) offsetW) + (((unsigned long) segmentW) << 4); /* Write the linear address back to the table. */ *pcbufferPL++ = sampleL; if ((pa_debugN & PA_GENERAL) != 0) printf("pa_pcsample: linear sample %lx\n",sampleL); } /* Write the samples to a binary file. */ if ((handleN = open (sampfileS, (O_CREAT | O_WRONLY | O_RAW), 0)) == (-1)) { free(pcorgPL); return(PA_NO_PC_FILE_E); } if ((write( handleN, ((char *) pcorgPL), (samplesN << 2))) != (samplesN << 2)) { close(handleN); free(pcorgPL); return(PA_NO_PC_WR_E); } close(handleN); free(pcorgPL); return(0); }