2 * This file contains the implementation of runtime dynamic instrumentation
3 * functions for a processor running UNIX.
6 * Revision 1.22 1995/02/16 09:07:33 markc
7 * Made Boolean type RT_Boolean to prevent picking up a different boolean
10 * Revision 1.21 1994/11/11 10:39:14 markc
11 * Commented out non-emergency printfs
13 * Revision 1.20 1994/10/09 21:26:05 hollings
14 * Fixed cycles to time conversion.
16 * Revision 1.19 1994/09/20 18:26:51 hollings
17 * added DYNINSTcyclesPerSecond to get cost values no matter the clock speed.
19 * removed a race condition in DYNINSTcyclesPerSecond.
21 * Revision 1.18 1994/08/22 16:05:37 markc
22 * Removed lastValue array.
23 * Added lastValue variable to timer structure.
24 * Added error messages for timer regression.
25 * Removed lastValue array.
26 * Added lastValue variable to timer structure.
27 * Added error messages for timer regression.
29 * Revision 1.17 1994/08/17 17:16:45 markc
30 * Increased the size of the lastValue and lastTime arrays. lastValue is
31 * referenced by the timer id, which can be greater than 200. There should
32 * be some way of enforcing a limit between paradynd and rtinst.
34 * Revision 1.16 1994/08/02 18:18:57 hollings
35 * added code to save/restore FP state on entry/exit to signal handle
36 * (really jcargill, but commited by hollings).
38 * changed comparisons on time regression to use 64 bit int compares rather
39 * than floats to prevent fp rounding error from causing false alarms.
41 * Revision 1.15 1994/07/26 20:04:48 hollings
42 * removed slots used variables.
44 * Revision 1.14 1994/07/22 19:24:54 hollings
45 * added actual paused time for CM-5.
47 * Revision 1.13 1994/07/16 03:39:31 hollings
48 * stopped using assembly version of clocks (temporary).
50 * Revision 1.12 1994/07/14 23:35:36 hollings
51 * added return of cost model record.
53 * Revision 1.11 1994/07/11 22:47:51 jcargill
54 * Major CM5 commit: include syntax changes, some timer changes, removal
55 * of old aggregation code, old pause code, added signal-driven sampling
56 * within node processes
58 * Revision 1.10 1994/07/05 03:25:11 hollings
59 * obsereved cost model.
61 * Revision 1.9 1994/05/18 00:53:28 hollings
62 * added flush's after error printfs to force data out pipes on the way
65 * Revision 1.8 1994/04/07 01:21:30 markc
66 * Cleaned up writes. Writes interrupted by system calls get retried, others
69 * Revision 1.7 1994/03/25 16:03:11 markc
70 * Added retry to write which could be interrupted by a signal.
72 * Revision 1.6 1994/02/16 00:07:24 hollings
73 * Added a default sampling interval of 500msec. Previous default was not
74 * to collect any data.
76 * Revision 1.5 1994/02/02 00:46:14 hollings
77 * Changes to make it compile with the new tree.
79 * Revision 1.4 1993/12/13 19:48:12 hollings
80 * force records to be word aligned.
82 * Revision 1.3 1993/10/19 15:29:58 hollings
83 * new simpler primitives.
85 * Revision 1.2 1993/08/26 19:43:17 hollings
86 * added uarea mapping code.
88 * Revision 1.1 1993/07/02 21:49:35 hollings
96 #include <sys/resource.h>
97 #include <sys/param.h>
106 #include "rtinst/h/rtinst.h"
107 #include "rtinst/h/trace.h"
109 #define MILLION 1000000
110 extern int DYNINSTmappedUarea;
111 extern float DYNINSTcyclesToUsec;
112 extern int DYNINSTnumReported;
113 extern int DYNINSTtotalAlaramExpires;
114 extern int DYNINSTtotalSamples;
115 extern float DYNINSTsamplingRate;
116 extern time64 DYNINSTtotalSampleTime;
117 extern time64 DYNINSTlastCPUTime;
118 extern time64 DYNINSTlastWallTime;
121 FILE *DYNINSTtraceFp;
122 tTimer DYNINSTelapsedTime;
123 tTimer DYNINSTelapsedCPUTime;
124 int64 DYNINSTgetObservedCycles(RT_Boolean);
125 time64 DYNINSTtotalSampleTime;
126 int64 DYNINSTgetObservedCycles();
128 /* clockWord must be volatile becuase it changes on clock interrups.
129 * -- added volatile jkh 7/3/94
132 typedef volatile unsigned int clockWord;
133 extern clockWord *_p_1, *_p_2;
139 extern int getrusage(int who, struct rusage *rusage);
142 * return cpuTime in useconds.
145 time64 DYNINSTgetCPUtime()
150 getrusage(RUSAGE_SELF, &ru);
151 now = ru.ru_utime.tv_sec;
153 now += ru.ru_utime.tv_usec;
158 time64 DYNINSTgetWallTime()
163 gettimeofday(&tv, NULL);
165 now *= (time64) MILLION;
171 inline time64 DYNINSTgetUserTime()
176 if (DYNINSTmappedUarea) {
182 if (*_p_1 != first) goto retry;
184 now = DYNINSTgetCPUtime();
190 void DYNINSTbreakPoint()
192 /* printf ("Process %d about to stop self...\n", getpid()); */
193 /* fflush (stdout); */
194 kill(getpid(), SIGSTOP);
195 /* printf ("Process %d has been restarted...\n", getpid()); */
196 /* fflush (stdout); */
199 void DYNINSTstartProcessTimer(tTimer *timer)
203 if (timer->counter == 0) {
204 temp = DYNINSTgetUserTime();
206 timer->normalize = MILLION;
208 /* this must be last to prevent race conditions */
212 void DYNINSTstopProcessTimer(tTimer *timer)
217 /* don't stop a counter that is not running */
218 if (!timer->counter) return;
221 /* Warning - there is a window between setting now, and mutex that
222 can cause time to go backwards by the time to execute the
223 instructions between these two points. This is not a cummlative error
224 and should not affect samples. This was done (rather than re-sampling
225 now because the cost of computing now is so high).
227 if (timer->counter == 1) {
228 now = DYNINSTgetUserTime();
229 timer->snapShot = now - timer->start + timer->total;
232 /* This next line can have the race condition. */
233 /* timer->total = timer->snapShot; */
234 timer->total = DYNINSTgetUserTime() - timer->start + timer->total;
236 if (now < timer->start) {
237 getrusage(RUSAGE_SELF, &ru);
238 printf("end before start\n");
248 void DYNINSTstartWallTimer(tTimer *timer)
252 if (timer->counter == 0) {
253 gettimeofday(&tv, NULL);
254 timer->start = tv.tv_sec;
255 timer->start *= (time64) MILLION;
256 timer->start += tv.tv_usec;
257 timer->normalize = MILLION;
259 /* this must be last to prevent race conditions */
263 void DYNINSTstopWallTimer(tTimer *timer)
268 /* don't stop a counter that is not running */
269 if (!timer->counter) return;
271 if (timer->counter == 1) {
272 gettimeofday(&tv, NULL);
276 /* see note before StopProcess time for warning about this mutex */
277 timer->snapShot = timer->total + now - timer->start;
280 timer->total = timer->snapShot;
289 volatile int DYNINSTpauseDone = 0;
292 * change the variable to let the process proceed.
295 void DYNINSTcontinueProcess()
297 DYNINSTpauseDone = 1;
306 * pause the process and let only USR2 signal handlers run until a SIGUSR1.
310 void DYNINSTpauseProcess()
315 sigs = ((1 << (SIGUSR2-1)) | (1 << (SIGUSR1-1)) | (1 << (SIGTSTP-1)));
317 DYNINSTpauseDone = 0;
318 while (!DYNINSTpauseDone) {
321 /* temporary busy wait until we figure out what the TSD is up to. */
322 printf("out of sigpuase\n");
328 void DYNINSTinit(int skipBreakpoint)
332 struct sigvec alarmVector;
333 extern int DYNINSTmapUarea();
334 extern float DYNINSTcyclesPerSecond();
335 extern void DYNINSTalarmExpire();
340 * clear global base tramp and cycle counters.
347 /* init these before the first alarm can expire */
348 DYNINSTcyclesToUsec = (1.0/DYNINSTcyclesPerSecond()) * 1000000;
349 DYNINSTlastCPUTime = DYNINSTgetCPUtime();
350 DYNINSTlastWallTime = DYNINSTgetWallTime();
352 /* define the alarm signal vector. We block all signals while sampling.
353 * This prevents race conditions where signal handlers cause timers to
354 * be started and stopped.
356 alarmVector.sv_handler = DYNINSTalarmExpire;
357 alarmVector.sv_mask = ~0;
358 alarmVector.sv_flags = 0;
360 sigvec(SIGALRM, &alarmVector, NULL);
362 /* All the signal-initiating stuff is now in the paradynd. */
364 /* default is 500msec */
366 interval = (char *) getenv("DYNINSTsampleInterval");
368 val = atoi(interval);
370 DYNINSTsamplingRate = val/1000000.0;
371 sigvec(SIGALRM, &alarmVector, NULL);
374 DYNINSTmappedUarea = DYNINSTmapUarea();
377 * pause the process and wait for additional info.
380 printf("Time at main %f\n", ((float) DYNINSTgetCPUtime())/1000000.0);
381 if (!skipBreakpoint) DYNINSTbreakPoint();
383 DYNINSTstartWallTimer(&DYNINSTelapsedTime);
384 DYNINSTstartProcessTimer(&DYNINSTelapsedCPUTime);
396 * generate a trace record onto the named stream.
399 void DYNINSTgenerateTraceRecord(traceStream sid, short type, short length,
400 void *eventData, int flush)
408 if (pipeGone) return;
410 gettimeofday(&tv, NULL);
411 header.wall = tv.tv_sec;
412 header.wall *= (time64) MILLION;
413 header.wall += tv.tv_usec;
414 header.wall -= startWall;
417 if (DYNINSTmappedUarea) {
418 header.process = *_p_1;
419 header.process *= MILLION;
420 header.process += *_p_2;
423 header.process = DYNINSTgetCPUtime();
428 /* round length off to a word aligned unit */
429 length = ALIGN_TO_WORDSIZE(length);
432 header.length = length;
434 memcpy(&buffer[count], &sid, sizeof(traceStream));
435 count += sizeof(traceStream);
437 memcpy(&buffer[count], &header, sizeof(header));
438 count += sizeof(header);
440 count = ALIGN_TO_WORDSIZE(count);
441 memcpy(&buffer[count], eventData, length);
444 /* on this platorm, we have a pipe to the controller process */
447 if (!DYNINSTtraceFp || (type == TR_EXIT)) {
448 DYNINSTtraceFp = fdopen(dup(CONTROLLER_FD), "w");
450 ret = fwrite(buffer, count, 1, DYNINSTtraceFp);
452 extern char *sys_errlist[];
453 printf("unable to write trace record %s\n", sys_errlist[errno]);
454 printf("disabling further data logging, pid=%d\n", getpid());
456 /* pipeGone = True; */
458 if (flush) DYNINSTflushTrace();
461 void DYNINSTflushTrace()
463 if (DYNINSTtraceFp) fflush(DYNINSTtraceFp);
466 /* time64 lastValue[10000]; */
467 double lastTime[200];
469 void DYNINSTreportTimer(tTimer *timer)
477 total = timer->snapShot;
478 } else if (timer->counter) {
479 /* timer is running */
480 if (timer->type == processTime) {
481 /* now = DYNINSTgetCPUtime(); */
482 /* use mapped time if available */
483 now = DYNINSTgetUserTime();
485 gettimeofday(&tv, NULL);
490 total = now - timer->start + timer->total;
492 total = timer->total;
494 if (total < timer->lastValue) {
495 if (timer->type == processTime) {
500 printf("time regressed timer %d, total = %f, last = %f\n",
501 timer->id.id, (float) total, (float) timer->lastValue);
502 if (timer->counter) {
503 printf("timer was active\n");
505 printf("timer was inactive\n");
507 printf("mutex=%d, counter=%d, sampled=%d, snapShot=%f\n\n",
508 (int)timer->mutex, (int)timer->counter, (int)timer->sampled,
509 (double) timer->snapShot);
510 printf("now = %f\n start = %f\n total = %f\n",
511 (double) now, (double) timer->start, (double) timer->total);
515 timer->lastValue = total;
517 sample.id = timer->id;
518 sample.value = ((double) total) / (double) timer->normalize;
519 DYNINSTtotalSamples++;
521 DYNINSTgenerateTraceRecord(0, TR_SAMPLE, sizeof(sample), &sample, 0);
522 /* printf("raw sample %d = %f\n", sample.id.id, sample.value); */
525 void DYNINSTfork(void *arg, int pid)
530 printf("fork called with pid = %d\n", pid);
533 forkRec.ppid = getpid();
537 DYNINSTgenerateTraceRecord(sid,TR_FORK,sizeof(forkRec), &forkRec, 1);
539 /* set up signals and stop at a break point */
546 void DYNINSTprintCost()
552 struct endStatsRec stats;
554 DYNINSTstopProcessTimer(&DYNINSTelapsedCPUTime);
555 DYNINSTstopWallTimer(&DYNINSTelapsedTime);
557 value = DYNINSTgetObservedCycles(0);
558 stats.instCycles = value;
560 value *= DYNINSTcyclesToUsec;
562 stats.alarms = DYNINSTtotalAlaramExpires;
563 stats.numReported = DYNINSTnumReported;
564 stats.instTime = ((double) value)/1000000.0;
565 stats.handlerCost = ((double) DYNINSTtotalSampleTime)/1000000.0;
567 now = DYNINSTgetCPUtime();
568 stats.totalCpuTime = ((double) DYNINSTelapsedCPUTime.total)/MILLION;
569 stats.totalWallTime = ((double) DYNINSTelapsedTime.total/MILLION);
571 stats.samplesReported = DYNINSTtotalSamples;
572 stats.samplingRate = DYNINSTsamplingRate;
577 fp = fopen("stats.out", "w");
580 if (DYNINSTprofile) {
585 extern void DYNINSTreportCounter();
588 limit = DYNINSTbufsiz/sizeof(short);
589 /* should really be _DYNINSTendUserCode */
590 startInst = (int) &DYNINSTreportCounter;
591 fprintf(fp, "startInst = %x\n", startInst);
592 for (i=0; i < limit; i++) {
593 if (DYNINSTprofBuffer[i])
594 fprintf(fp, "%x %d\n", i*2, DYNINSTprofBuffer[i]);
595 if (i * 2 > startInst) {
596 stats.instTicks += DYNINSTprofBuffer[i];
598 stats.userTicks += DYNINSTprofBuffer[i];
602 /* fwrite(DYNINSTprofBuffer, DYNINSTbufsiz, 1, fp); */
603 fprintf(fp, "stats.instTicks %d\n", stats.instTicks);
604 fprintf(fp, "stats.userTicks %d\n", stats.userTicks);
608 getrusage(RUSAGE_SELF, &ru);
610 fprintf(fp, "DYNINSTtotalAlaramExpires %d\n", stats.alarms);
611 fprintf(fp, "DYNINSTnumReported %d\n", stats.numReported);
612 fprintf(fp,"Raw cycle count = %f\n", (double) stats.instCycles);
613 fprintf(fp,"Total instrumentation cost = %f\n", stats.instTime);
614 fprintf(fp,"Total handler cost = %f\n", stats.handlerCost);
615 fprintf(fp,"Total cpu time of program %f\n", stats.totalCpuTime);
616 fprintf(fp,"Total system time of program %f\n",
617 ru.ru_stime.tv_sec * 1000000.0 + ru.ru_stime.tv_usec);
618 fprintf(fp,"Elapsed wall time of program %f\n",
619 stats.totalWallTime/1000000.0);
620 fprintf(fp,"total data samples %d\n", stats.samplesReported);
621 fprintf(fp,"sampling rate %f\n", stats.samplingRate);
622 fprintf(fp,"Application program ticks %d\n", stats.userTicks);
623 fprintf(fp,"Instrumentation ticks %d\n", stats.instTicks);
627 /* record that we are done -- should be somewhere better. */
628 DYNINSTgenerateTraceRecord(0, TR_EXIT, sizeof(stats), &stats, 1);
633 * These are null routines for Unix: signal handler semantics
634 * guarantee that the FPU state is saved/restored. Not so on CM5 nodes!
637 void saveFPUstate(float *base)
642 void restoreFPUstate(float *base)
648 * If we can't read it, try to guess it.
650 static float guessClock()
655 #define PATTERN "\tclock-frequency:"
657 * find the number of cycles per second on this machine.
659 float DYNINSTcyclesPerSecond()
666 fp = popen("/usr/etc/devinfo -vp", "r");
668 /* can't run command so guess the clock rate. */
670 while (fgets(line, sizeof(line), fp)) {
671 if (!strncmp(PATTERN, line, strlen(PATTERN))) {
672 ptr = strchr(line, ' ');
673 if (!ptr) return(guessClock());
674 speed = strtol(ptr, NULL, 16);
680 /* printf(line, "Clock = %d\n", speed); */