From b732e8caee6e14dae935abcde353a3ecf838f72d Mon Sep 17 00:00:00 2001 From: hollings Date: Tue, 5 Jul 1994 03:25:09 +0000 Subject: [PATCH] obsereved cost model. --- rtinst/src/RTfuncs.c | 101 +++++++++++++++- rtinst/src/RTsparc.c | 317 ++++++++++++++++++++++++++++++++++++++++++++++++++- rtinst/src/RTunix.c | 93 ++++++++++++--- 3 files changed, 485 insertions(+), 26 deletions(-) diff --git a/rtinst/src/RTfuncs.c b/rtinst/src/RTfuncs.c index a7ceee9..825f46c 100644 --- a/rtinst/src/RTfuncs.c +++ b/rtinst/src/RTfuncs.c @@ -3,7 +3,10 @@ * functions for a SUNOS SPARC processor. * * $Log: RTfuncs.c,v $ - * Revision 1.6 1994/02/02 00:46:11 hollings + * Revision 1.7 1994/07/05 03:25:09 hollings + * obsereved cost model. + * + * Revision 1.6 1994/02/02 00:46:11 hollings * Changes to make it compile with the new tree. * * Revision 1.5 1993/12/13 19:47:29 hollings @@ -23,6 +26,7 @@ * * */ +#include #include #include @@ -45,6 +49,13 @@ char DYNINSTdata[SYN_INST_BUF_SIZE]; char DYNINSTglobalData[SYN_INST_BUF_SIZE]; int DYNINSTnumSampled; int DYNINSTnumReported; +int DYNINSTtotalAlaramExpires; + +/* + * for now costCount is in cycles. + */ +float DYNINSTcyclesToUsec = 1/66.0; +extern time64 DYNINSTtotalSampleTime; void DYNINSTreportCounter(intCounter *counter) { @@ -88,14 +99,95 @@ volatile int DYNINSTsampleMultiple = 1; * timers and counters. The code to do the sampling is added as func * entry dynamic instrumentation. * + * It also reports the current value of the observed cost. + * */ void DYNINSTsampleValues() { DYNINSTnumReported++; } +#define FOUR_BILLION (((double) 1.0) * 1024 * 1024 * 1024 * 4) + +/* + * Define a union to let us get at the bits of a 64 bit integer. + * + * This is needed since gcc doesn't support 64 bit ints fully. + * + * Think at least twice before changing this. jkh 7/2/94 + */ +union timeUnion { + unsigned int array[2]; + int64 i64; +}; + +/* + * Return the observed cost of instrumentation in machine cycles. + * + */ +int64 DYNINSTgetObservedCycles() +{ + static int64 previous; + static union timeUnion value; + register unsigned int lowBits asm("%g7"); + + value.array[1] = lowBits; + if (value.i64 < previous) { + /* add to high word + * + ************************** WARNING *************************** + * this assumes we sample frequenly enough to catch these * + ************************************************************** + */ + fprintf(stderr, "current %f, previous %f\n", ((double) value.i64), + ((double) previous)); + fprintf(stderr, "Warning observed cost register wrapped\n"); + value.array[0] += 1; + + fflush(stderr); + } + previous = value.i64; + return(value.i64); +} + +void DYNINSTreportCost(intCounter *counter) +{ + /* + * This should eventually be replaced by the normal code to report + * a mapped counter??? + */ + + double cost; + int64 value; + static double prevCost; + traceSample sample; + + value = DYNINSTgetObservedCycles(); + cost = ((double) value) * (DYNINSTcyclesToUsec / 1000000.0); + + if (cost < prevCost) { + fprintf(stderr, "Fatal Error Cost counter went backwards\n"); + fflush(stderr); + sigpause(0xffff); + } + + prevCost = cost; + + sample.value = cost; + sample.id = counter->id; + + DYNINSTgenerateTraceRecord(0, TR_SAMPLE, sizeof(sample), &sample); +} + +/* + * Call this function to generate a sample when needed. + * Exception is the exit from the program which DYNINSTsampleValues should + * be called directly!!! + * + */ void DYNINSTalarmExpire() { + time64 start, end; static int inSample; /* should use atomic test and set for this */ @@ -104,8 +196,13 @@ void DYNINSTalarmExpire() inSample = 1; /* only sample every DYNINSTsampleMultiple calls */ - if ((++DYNINSTnumSampled % DYNINSTsampleMultiple) == 0) + DYNINSTtotalAlaramExpires++; + if ((++DYNINSTnumSampled % DYNINSTsampleMultiple) == 0) { + start = DYNINSTgetCPUtime(); DYNINSTsampleValues(); + end = DYNINSTgetCPUtime(); + DYNINSTtotalSampleTime += end - start; + } inSample = 0; } diff --git a/rtinst/src/RTsparc.c b/rtinst/src/RTsparc.c index 7dd9846..8e13638 100644 --- a/rtinst/src/RTsparc.c +++ b/rtinst/src/RTsparc.c @@ -4,7 +4,10 @@ * functions for a normal Sparc with SUNOS. * * $Log: RTsparc.c,v $ - * Revision 1.3 1994/02/02 00:46:12 hollings + * Revision 1.4 1994/07/05 03:25:10 hollings + * obsereved cost model. + * + * Revision 1.3 1994/02/02 00:46:12 hollings * Changes to make it compile with the new tree. * * Revision 1.2 1993/12/13 19:47:52 hollings @@ -31,6 +34,8 @@ #include #include +#include "../h/rtinst.h" + int DYNINSTmappedUarea; int *_p_1, *_p_2; static int _kmem = -1; @@ -50,22 +55,36 @@ caddr_t DYNINSTprobeUarea() struct proc *p; kd = kvm_open(NULL, NULL, NULL, O_RDONLY, NULL); - if (!kd) return(0); + if (!kd) { + perror("kvm_open"); + return(0); + } pid = getpid(); p = kvm_getproc(kd, pid); - if (!p) return(0); + if (!p) { + return(0); + perror("kvm_getproc"); + } u = kvm_getu(kd, p); - if (!u) return(0); + if (!u) { + perror("kvm_getu"); + return(0); + } kvm_getcmd(kd, p, u, &args, NULL); - if (cmd = (char *) rindex(args[0], '/')) { + cmd = (char *) rindex(args[0], '/'); + if (cmd) { cmd++; } else { cmd = args[0]; } +#ifdef notdef if (strcmp(cmd, u->u_comm)) { + printf("cmd = %s, u_comm = %s\n", cmd, u->u_comm); + perror("no cmd"); return(0); } +#endif kvm_close(kd); return((caddr_t)p->p_uarea); @@ -83,6 +102,8 @@ int DYNINSTmapUarea() printf("WARNING: program compiled for wrong version of SPARC chip.\n"); printf(" using getrusage for times, this may slow your program down\n"); printf(" by a factor of ten or more.\n"); + printf("\n"); + fflush(stdout); return(0); } @@ -102,3 +123,289 @@ int DYNINSTmapUarea() return(1); } + +/* + * Run a nop loop to estimate clock frequency. + * + */ + +#define LOOP_LIMIT 50000 +#define MILLION 1000000 + +float DYNINSTgetClock() +{ + + int i; + float elapsed; + float clockSpeed; + time64 startF, endF; + + startF = DYNINSTgetCPUtime(); + for (i=0; i < LOOP_LIMIT; i++) { + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + asm("nop"); + } + endF = DYNINSTgetCPUtime(); + + elapsed = (endF-startF)/((double) MILLION); + clockSpeed = (256*LOOP_LIMIT)/elapsed/MILLION; + + printf("elapsed = %f\n", elapsed); + printf("clockSpeed = %f\n", clockSpeed); + + return(clockSpeed); +} diff --git a/rtinst/src/RTunix.c b/rtinst/src/RTunix.c index 119dded..e6d8f6e 100644 --- a/rtinst/src/RTunix.c +++ b/rtinst/src/RTunix.c @@ -3,7 +3,10 @@ * functions for a processor running UNIX. * * $Log: RTunix.c,v $ - * Revision 1.9 1994/05/18 00:53:28 hollings + * Revision 1.10 1994/07/05 03:25:11 hollings + * obsereved cost model. + * + * Revision 1.9 1994/05/18 00:53:28 hollings * added flush's after error printfs to force data out pipes on the way * to paradyn. * @@ -51,7 +54,16 @@ #define MILLION 1000000 extern int DYNINSTmappedUarea; -extern int *_p_1, *_p_2; +extern float DYNINSTcyclesToUsec; +time64 DYNINSTtotalSampleTime; +int64 DYNINSTgetObservedCycles(); + +/* clockWord must be volatile becuase it changes on clock interrups. + * -- added volatile jkh 7/3/94 + * + */ +typedef volatile unsigned int clockWord; +extern clockWord *_p_1, *_p_2; /* * Missing stuff. @@ -59,11 +71,27 @@ extern int *_p_1, *_p_2; */ extern int getrusage(int who, struct rusage *rusage); +/* + * return cpuTime in useconds. + * + */ +time64 DYNINSTgetCPUtime() +{ + time64 now; + struct rusage ru; + + getrusage(RUSAGE_SELF, &ru); + now = ru.ru_utime.tv_sec; + now *= MILLION; + now += ru.ru_utime.tv_usec; + + return(now); +} + inline time64 DYNINSTgetUserTime() { int first; time64 now; - struct rusage ru; if (DYNINSTmappedUarea) { retry: @@ -73,10 +101,7 @@ retry: now += *_p_2; if (*_p_1 != first) goto retry; } else { - getrusage(RUSAGE_SELF, &ru); - now = ru.ru_utime.tv_sec; - now *= MILLION; - now += ru.ru_utime.tv_usec; + now = DYNINSTgetCPUtime(); } return(now); } @@ -214,7 +239,8 @@ void DYNINSTinit(int skipBreakpoint) char *interval; struct sigvec alarmVector; struct sigvec pauseVector; - extern void DYNINSTsampleValues(); + extern float DYNINSTgetClock(); + extern void DYNINSTalarmExpire(); startWall = 0; @@ -234,7 +260,7 @@ void DYNINSTinit(int skipBreakpoint) * This prevents race conditions where signal handlers cause timers to * be started and stopped. */ - alarmVector.sv_handler = DYNINSTsampleValues; + alarmVector.sv_handler = DYNINSTalarmExpire; alarmVector.sv_mask = ~0; alarmVector.sv_flags = 0; @@ -249,11 +275,13 @@ void DYNINSTinit(int skipBreakpoint) ualarm(val, val); DYNINSTmappedUarea = DYNINSTmapUarea(); + DYNINSTcyclesToUsec = 1.0/DYNINSTgetClock(); /* * pause the process and wait for additional info. * */ + printf("Time at main %f\n", ((float) DYNINSTgetCPUtime())/1000000.0); if (!skipBreakpoint) DYNINSTbreakPoint(); } @@ -270,7 +298,6 @@ void DYNINSTgenerateTraceRecord(traceStream sid, short type, short length, { int ret; int count; - struct rusage ru; struct timeval tv; char buffer[1024], *bufptr; traceHeader header; @@ -291,10 +318,7 @@ void DYNINSTgenerateTraceRecord(traceStream sid, short type, short length, header.process += *_p_2; } else { #endif - getrusage(RUSAGE_SELF, &ru); - header.process = ru.ru_utime.tv_sec; - header.process *= (time64) MILLION; - header.process += ru.ru_utime.tv_usec; + header.process = DYNINSTgetCPUtime(); #ifdef notdef } #endif @@ -348,7 +372,6 @@ void DYNINSTreportTimer(tTimer *timer) { time64 now; time64 total; - struct rusage ru; struct timeval tv; traceSample sample; @@ -357,10 +380,7 @@ void DYNINSTreportTimer(tTimer *timer) } else if (timer->counter) { /* timer is running */ if (timer->type == processTime) { - getrusage(RUSAGE_SELF, &ru); - now = ru.ru_utime.tv_sec; - now *= (time64) MILLION; - now += ru.ru_utime.tv_usec; + now = DYNINSTgetCPUtime(); } else { gettimeofday(&tv, NULL); now = tv.tv_sec; @@ -404,3 +424,38 @@ void DYNINSTfork(void *arg, int pid) sigpause(); } } + + +extern int DYNINSTnumReported; +extern int DYNINSTtotalAlaramExpires; + +void DYNINSTprintCost() +{ + time64 now; + int64 value; + FILE *fp; + + value = DYNINSTgetObservedCycles(); + printf("Raw cycle count = %f\n", (double) value); + + value *= DYNINSTcyclesToUsec; + + fp = fopen("stats.out", "w"); + + fprintf(fp, "DYNINSTtotalAlaramExpires %d\n", DYNINSTtotalAlaramExpires); + fprintf(fp, "DYNINSTnumReported %d\n", DYNINSTnumReported); + + fprintf(fp,"Total instrumentation cost = %f\n", ((double) value)/1000000.0); + fprintf(fp,"Total handler cost = %f\n", + ((double) DYNINSTtotalSampleTime)/1000000.0); + + now = DYNINSTgetCPUtime(); + + fprintf(fp,"Total cpu time of program %f\n", ((double) now)/MILLION); + fflush(fp); + fclose(fp); + + /* record that we are done -- should be somewhere better. */ + DYNINSTgenerateTraceRecord(0, TR_EXIT, 0, NULL); +} + -- 1.8.3.1