2 * Copyright (c) 1996 Barton P. Miller
4 * We provide the Paradyn Parallel Performance Tools (below
5 * described as Paradyn") on an AS IS basis, and do not warrant its
6 * validity or performance. We reserve the right to update, modify,
7 * or discontinue this software at any time. We shall have no
8 * obligation to supply such updates or modifications or any other
9 * form of support to you.
11 * This license is for research uses. For such uses, there is no
12 * charge. We define "research use" to mean you may freely use it
13 * inside your organization for whatever purposes you see fit. But you
14 * may not re-distribute Paradyn or parts of Paradyn, in any form
15 * source or binary (including derivatives), electronic or otherwise,
16 * to any other organization or entity without our permission.
18 * (for other uses, please contact us at paradyn@cs.wisc.edu)
20 * All warranties, including without limitation, any warranty of
21 * merchantability or fitness for a particular purpose, are hereby
24 * By your use of Paradyn, you understand and agree that we (or any
25 * other person or entity with proprietary rights in Paradyn) are
26 * under no obligation to provide either maintenance services,
27 * update services, notices of latent defects, or correction of
28 * defects for Paradyn.
30 * Even if advised of the possibility of such damages, under no
31 * circumstances shall we (or any other person or entity with
32 * proprietary rights in the software licensed hereunder) be liable
33 * to you or any third party for direct, indirect, or consequential
34 * damages of any character regardless of type of action, including,
35 * without limitation, loss of profits, loss of use, loss of good
36 * will, or computer failure or malfunction. You agree to indemnify
37 * us (and any other person or entity with proprietary rights in the
38 * software licensed hereunder) for any and all liability it may
39 * incur to third parties resulting from your use of Paradyn.
42 /************************************************************************
43 * RTsolaris.c: clock access functions for solaris-2.
44 ************************************************************************/
47 #include <sys/ucontext.h>
50 #include <sys/procfs.h> /* /proc PIOCUSAGE */
52 #include <fcntl.h> /* O_RDONLY */
53 #include <unistd.h> /* getpid() */
55 #include "rtinst/h/rtinst.h"
57 /*extern int gettimeofday(struct timeval *, struct timezone *);*/
58 extern void perror(const char *);
63 /************************************************************************
65 ************************************************************************/
67 static const double NANO_PER_USEC = 1.0e3;
68 static const double MILLION = 1.0e6;
74 /************************************************************************
75 * void DYNINSTos_init(void)
77 * os initialization function---currently null.
78 ************************************************************************/
81 DYNINSTos_init(int,int) { /* params: calledByFork, calledByAttach */
85 This is currently being used only on the x86 platform.
87 #ifdef i386_unknown_solaris2_5
88 void DYNINSTtrapHandler(int sig, siginfo_t *info, ucontext_t *uap);
90 act.sa_handler = DYNINSTtrapHandler;
92 sigfillset(&act.sa_mask);
93 if (sigaction(SIGTRAP, &act, 0) != 0) {
94 perror("sigaction(SIGTRAP)");
106 /************************************************************************
107 * time64 DYNINSTgetCPUtime(void)
109 * get the total CPU time used for "an" LWP of the monitored process.
110 * this functions needs to be rewritten if a per-thread CPU time is
111 * required. time for a specific LWP can be obtained via the "/proc"
113 * return value is in usec units.
115 * XXXX - This should really return time in native units and use normalize.
116 * conversion to float and division are way too expensive to
117 * do everytime we want to read a clock (slows this down 2x) -
119 ************************************************************************/
122 static unsigned long long div1000(unsigned long long in) {
123 /* Divides by 1000 without an integer division instruction or library call, both of
125 * We do only shifts, adds, and subtracts.
127 * We divide by 1000 in this way:
128 * multiply by 1/1000, or multiply by (1/1000)*2^30 and then right-shift by 30.
129 * So what is 1/1000 * 2^30?
130 * It is 1,073,742. (actually this is rounded)
131 * So we can multiply by 1,073,742 and then right-shift by 30 (neat, eh?)
133 * Now for multiplying by 1,073,742...
134 * 1,073,742 = (1,048,576 + 16384 + 8192 + 512 + 64 + 8 + 4 + 2)
135 * or, slightly optimized:
136 * = (1,048,576 + 16384 + 8192 + 512 + 64 + 16 - 2)
137 * for a total of 8 shifts and 6 add/subs, or 14 operations.
141 unsigned long long temp = in << 20; // multiply by 1,048,576
142 // beware of overflow; left shift by 20 is quite a lot.
143 // If you know that the input fits in 32 bits (4 billion) then
144 // no problem. But if it's much bigger then start worrying...
146 temp += in << 14; // 16384
147 temp += in << 13; // 8192
148 temp += in << 9; // 512
149 temp += in << 6; // 64
150 temp += in << 4; // 16
151 temp -= in >> 2; // 2
153 return (temp >> 30); // divide by 2^30
156 static unsigned long long divMillion(unsigned long long in) {
157 /* Divides by 1,000,000 without an integer division instruction or library call,
158 * both of which are slow.
159 * We do only shifts, adds, and subtracts.
161 * We divide by 1,000,000 in this way:
162 * multiply by 1/1,000,000, or multiply by (1/1,000,000)*2^30 and then right-shift
163 * by 30. So what is 1/1,000,000 * 2^30?
164 * It is 1,074. (actually this is rounded)
165 * So we can multiply by 1,074 and then right-shift by 30 (neat, eh?)
167 * Now for multiplying by 1,074
168 * 1,074 = (1024 + 32 + 16 + 2)
169 * for a total of 4 shifts and 4 add/subs, or 8 operations.
171 * Note: compare with div1000 -- it's cheaper to divide by a million than
176 unsigned long long temp = in << 10; // multiply by 1024
177 // beware of overflow...if the input arg uses more than 52 bits
178 // than start worrying about whether (in << 10) plus the smaller additions
179 // we're gonna do next will fit in 64...
181 temp += in << 5; // 32
182 temp += in << 4; // 16
183 temp += in << 1; // 2
185 return (temp >> 30); // divide by 2^30
188 static unsigned long long mulMillion(unsigned long long in) {
189 unsigned long long result = in;
191 /* multiply by 125 by multiplying by 128 and subtracting 3x */
192 result = (result << 7) - result - result - result;
194 /* multiply by 125 again, for a total of 15625x */
195 result = (result << 7) - result - result - result;
197 /* multiply by 64, for a total of 1,000,000x */
200 /* cost was: 3 shifts and 6 subtracts
201 * cost of calling mul1000(mul1000()) would be: 6 shifts and 4 subtracts
203 * Another algorithm is to multiply by 2^6 and then 5^6.
204 * The former is super-cheap (one shift); the latter is more expensive.
205 * 5^6 = 15625 = 16384 - 512 - 256 + 8 + 1
206 * so multiplying by 5^6 means 4 shift operations and 4 add/sub ops
207 * so multiplying by 1000000 means 5 shift operations and 4 add/sub ops.
208 * That may or may not be cheaper than what we're doing (3 shifts; 6 subtracts);
209 * I'm not sure. --ari
215 static int firstTime = 1; /* boolean */
216 static int procfd = -1;
218 void DYNINSTgetCPUtimeInitialize(void) {
219 /* This stuff is done just once */
222 sprintf(str, "/proc/%d", (int)getpid());
223 procfd = open(str, O_RDONLY);
225 fprintf(stderr, "open of /proc failed in DYNINSTgetCPUtimeInitialize\n");
232 DYNINSTgetCPUtime(void) {
233 static time64 previous=0;
236 /* gethrvtime()/1000 doesn't work right any more with shm sampling because it
237 * returns values that are out of sync with /proc's PIOCUSAGE, so when a fudge
238 * factor needs to be added by paradynd's shm sampling of an active timer,
239 * things don't work. getrusage() does seem to work okay, but we'd like to not use
240 * getrusage() because it's obsolete in solaris and slower; so we use /proc PIOCUSAGE...
242 * This is too bad; we'd prefer the (presumably fast) gethrvtime(). But, again,
243 * it simply won't work with shm sampling. If you are thinking of changing things
244 * back to gethrvtime(), please check with me first. --ari
246 * Some day........in an ideal world, we'll use the %TICK register......
249 /* time64 now = (time64)gethrvtime()/(time64)1000; */
251 struct prusage theUsage; /* for /proc PIOCUSAGE call */
255 DYNINSTgetCPUtimeInitialize();
259 if (ioctl(procfd, PIOCUSAGE, &theUsage) < 0) {
260 perror("rtinst get-cpu-time PIOCUSAGE");
264 now = theUsage.pr_utime.tv_sec + theUsage.pr_stime.tv_sec;
266 now = mulMillion(now); /* sec to usec */
267 /* now *= 1000000; */
269 now += div1000(theUsage.pr_utime.tv_nsec + theUsage.pr_stime.tv_nsec);
270 /* now += (theUsage.pr_utime.tv_nsec + theUsage.pr_stime.tv_nsec) / 1000; */
273 /* I don't think that this ever happens for solaris, thankfully */
285 /************************************************************************
286 * time64 DYNINSTgetWalltime(void)
288 * get the total walltime used by the monitored process.
289 * return value is in usec units.
290 ************************************************************************/
293 DYNINSTgetWalltime(void) {
294 static time64 previous=0;
299 if (gettimeofday(&tv,NULL) == -1) {
300 perror("gettimeofday");
304 now = mulMillion(tv.tv_sec) + tv.tv_usec;
305 // now = (time64)tv.tv_sec*(time64)1000000 + (time64)tv.tv_usec;
307 if (now < previous) continue;
314 /****************************************************************************
315 The trap handler. Currently being used only on x86 platform.
317 Traps are used when we can't insert a jump at a point. The trap
318 handler looks up the address of the base tramp for the point that
319 uses the trap, and set the pc to this base tramp.
320 The paradynd is responsible for updating the tramp table when it
321 inserts instrumentation.
322 *****************************************************************************/
324 #ifdef i386_unknown_solaris2_5
325 trampTableEntry DYNINSTtrampTable[TRAMPTABLESZ];
326 unsigned DYNINSTtotalTraps = 0;
328 static unsigned lookup(unsigned key) {
331 for (u = HASH1(key); 1; u += HASH2(key) % TRAMPTABLESZ) {
332 k = DYNINSTtrampTable[u].key;
336 return DYNINSTtrampTable[u].val;
342 void DYNINSTtrapHandler(int sig, siginfo_t *info, ucontext_t *uap) {
343 unsigned pc = uap->uc_mcontext.gregs[PC];
344 unsigned nextpc = lookup(pc);
346 uap->uc_mcontext.gregs[PC] = nextpc;