2 * Copyright (c) 1996 Barton P. Miller
4 * We provide the Paradyn Parallel Performance Tools (below
5 * described as Paradyn") on an AS IS basis, and do not warrant its
6 * validity or performance. We reserve the right to update, modify,
7 * or discontinue this software at any time. We shall have no
8 * obligation to supply such updates or modifications or any other
9 * form of support to you.
11 * This license is for research uses. For such uses, there is no
12 * charge. We define "research use" to mean you may freely use it
13 * inside your organization for whatever purposes you see fit. But you
14 * may not re-distribute Paradyn or parts of Paradyn, in any form
15 * source or binary (including derivatives), electronic or otherwise,
16 * to any other organization or entity without our permission.
18 * (for other uses, please contact us at paradyn@cs.wisc.edu)
20 * All warranties, including without limitation, any warranty of
21 * merchantability or fitness for a particular purpose, are hereby
24 * By your use of Paradyn, you understand and agree that we (or any
25 * other person or entity with proprietary rights in Paradyn) are
26 * under no obligation to provide either maintenance services,
27 * update services, notices of latent defects, or correction of
28 * defects for Paradyn.
30 * Even if advised of the possibility of such damages, under no
31 * circumstances shall we (or any other person or entity with
32 * proprietary rights in the software licensed hereunder) be liable
33 * to you or any third party for direct, indirect, or consequential
34 * damages of any character regardless of type of action, including,
35 * without limitation, loss of profits, loss of use, loss of good
36 * will, or computer failure or malfunction. You agree to indemnify
37 * us (and any other person or entity with proprietary rights in the
38 * software licensed hereunder) for any and all liability it may
39 * incur to third parties resulting from your use of Paradyn.
42 #include "util/h/headers.h"
46 #include "rtinst/h/rtinst.h"
47 #include "rtinst/h/trace.h"
48 #include "util/h/aggregateSample.h"
49 #include "dyninstAPI/src/symtab.h"
50 #include "dyninstAPI/src/pdThread.h"
51 #include "dyninstAPI/src/process.h"
52 #include "dyninstAPI/src/inst.h"
53 #include "dyninstAPI/src/instP.h"
54 #include "dyninstAPI/src/dyninstP.h"
55 #include "dyninstAPI/src/ast.h"
56 #include "dyninstAPI/src/util.h"
57 #include "paradynd/src/comm.h"
58 #include "paradynd/src/internalMetrics.h"
59 #include "paradynd/src/init.h"
60 #include "paradynd/src/perfStream.h"
61 #include "paradynd/src/main.h"
62 #include "dyninstAPI/src/stats.h"
63 #include "paradynd/src/dynrpc.h"
64 #include "paradynd/src/mdld.h"
65 #include "util/h/Timer.h"
66 #include "paradynd/src/showerror.h"
67 #include "paradynd/src/costmetrics.h"
68 #include "paradynd/src/metric.h"
69 #include "util/h/debugOstream.h"
71 // The following vrbles were defined in process.C:
72 extern debug_ostream attach_cerr;
73 extern debug_ostream inferiorrpc_cerr;
74 extern debug_ostream shmsample_cerr;
75 extern debug_ostream forkexec_cerr;
76 extern debug_ostream metric_cerr;
78 extern unsigned inferiorMemAvailable;
79 extern vector<unsigned> getAllTrampsAtPoint(instInstance *instance);
80 static unsigned internalMetricCounterId = 0;
82 static unsigned numOfActCounters_all=0;
83 static unsigned numOfActProcTimers_all=0;
84 static unsigned numOfActWallTimers_all=0;
86 void flush_batch_buffer();
87 void batchSampleData(int mid, double startTimeStamp, double endTimeStamp,
88 double value, unsigned val_weight, bool internal_metric);
90 double currentPredictedCost = 0.0;
92 dictionary_hash <unsigned, metricDefinitionNode*> midToMiMap(uiHash);
93 // maps low-level counter-ids to metricDefinitionNodes
95 unsigned mdnHash(const metricDefinitionNode *&mdn) {
96 return ((unsigned)mdn) >> 2; // assume all addrs are 4-byte aligned
97 // return ((unsigned) mdn);
100 unsigned componentMdnPtrHash(metricDefinitionNode * const &ptr) {
101 // maybe assert that "ptr" isn't for an aggregate mi
102 return string::hash(ptr->getFullName());
106 dictionary_hash<unsigned, metricDefinitionNode*> allMIs(uiHash);
107 dictionary_hash<string, metricDefinitionNode*> allMIComponents(string::hash);
108 vector<internalMetric*> internalMetric::allInternalMetrics;
110 // used to indicate the mi is no longer used.
112 #define MILLION 1000000.0
114 bool mdl_internal_metric_data(const string& metric_name, mdl_inst_data& result) {
115 unsigned size = internalMetric::allInternalMetrics.size();
116 for (unsigned u=0; u<size; u++) {
117 internalMetric *theIMetric = internalMetric::allInternalMetrics[u];
118 if (theIMetric->name() == metric_name) {
119 result.aggregate = theIMetric->aggregate();
120 result.style = theIMetric->style();
125 for (unsigned u2=0; u2< costMetric::allCostMetrics.size(); u2++) {
126 if (costMetric::allCostMetrics[u2]->name() == metric_name) {
127 result.aggregate = costMetric::allCostMetrics[u2]->aggregate();
128 result.style = costMetric::allCostMetrics[u2]->style();
133 return (mdl_metric_data(metric_name, result));
136 // for non-aggregate metrics
137 metricDefinitionNode::metricDefinitionNode(process *p, const string& met_name,
138 const vector< vector<string> >& foc,
139 const vector< vector<string> >& component_foc,
140 const string& component_flat_name, int agg_style)
142 aggOp(agg_style), // CM5 metrics need aggOp to be set
143 inserted_(false), installed_(false), met_(met_name),
144 focus_(foc), component_focus(component_foc),
145 flat_name_(component_flat_name),
147 cumulativeValue_float(0.0),
148 id_(-1), originalCost_(0.0), proc_(p)
152 aflag=mdl_internal_metric_data(met_name, md);
157 // for aggregate metrics
158 metricDefinitionNode::metricDefinitionNode(const string& metric_name,
159 const vector< vector<string> >& foc,
160 const string& cat_name,
161 vector<metricDefinitionNode*>& parts,
163 : aggregate_(true), aggOp(agg_op), inserted_(false), installed_(false),
164 met_(metric_name), focus_(foc),
165 flat_name_(cat_name), components(parts),
167 cumulativeValue_float(0.0),
168 id_(-1), originalCost_(0.0), proc_(NULL)
170 unsigned p_size = parts.size();
171 for (unsigned u=0; u<p_size; u++) {
172 metricDefinitionNode *mi = parts[u];
173 mi->aggregators += this;
174 mi->samples += aggSample.newComponent();
178 // check for "special" metrics that are computed directly by paradynd
179 // if a cost of an internal metric is asked for, enable=false
180 metricDefinitionNode *doInternalMetric(vector< vector<string> >& canon_focus,
181 vector< vector<string> >& component_canon_focus,
182 string& metric_name, string& flat_name,
183 bool enable, bool& matched)
185 // called by createMetricInstance, below.
187 // a valid metricDefinitionNode* when successful
188 // -1 --> enable was false
189 // -2 --> not legal to instrument this focus
190 // NULL --> a more serious error (probably metric-is-unknown)
193 metricDefinitionNode *mn = 0;
195 // check to see if this is an internal metric
196 unsigned im_size = internalMetric::allInternalMetrics.size();
197 for (unsigned im_index=0; im_index<im_size; im_index++){
198 internalMetric *theIMetric = internalMetric::allInternalMetrics[im_index];
199 if (theIMetric->name() == metric_name) {
202 return (metricDefinitionNode*)-1;
204 if (!theIMetric->legalToInst(canon_focus))
205 // Paradyn will handle this case and report appropriate error msg
206 return (metricDefinitionNode*)-2;
208 mn = new metricDefinitionNode(NULL, metric_name, canon_focus,
209 component_canon_focus,
210 flat_name, theIMetric->aggregate());
213 theIMetric->enableNewInstance(mn);
218 // check to see if this is a cost metric
219 for (unsigned i=0; i < costMetric::allCostMetrics.size(); i++){
220 if(costMetric::allCostMetrics[i]->name() == metric_name){
222 if (!enable) return (metricDefinitionNode*)-1;
223 costMetric *nc = costMetric::allCostMetrics[i];
224 if (!nc->legalToInst(canon_focus)) return (metricDefinitionNode*)-2;
226 mn = new metricDefinitionNode(NULL, metric_name, canon_focus,
227 component_canon_focus,
228 flat_name, nc->aggregate());
237 // No matches found among internal or cost metrics
241 // the following should probably be made a public static member fn of class metric
242 string metricAndCanonFocus2FlatName(const string &metricName,
243 const vector< vector<string> > &canonFocus) {
244 string result = metricName;
246 for (unsigned hierarchy=0; hierarchy < canonFocus.size(); hierarchy++)
247 for (unsigned component=0; component < canonFocus[hierarchy].size();
249 result += canonFocus[hierarchy][component];
254 // the following should probably be made a public static member fn of class metric
255 static bool focus2CanonicalFocus(const vector<unsigned> &focus,
256 vector< vector<string> > &canonFocus,
258 // takes in "focus", writes to "canonFocus". Returns true iff successful.
259 // if "important" is false, don't print error msg on failure (called by guessCost();
260 // no mi is really being created)
262 vector< vector<string> > unCanonFocus;
263 if (!resource::foc_to_strings(unCanonFocus, focus, important)) { // writes to unCanonFocus
265 cerr << "focus2CanonicalFocus failed since resource::foc_to_strings failed" << endl;
269 resource::make_canonical(unCanonFocus, canonFocus);
274 static void print_focus(debug_ostream &os, vector< vector<string> > &focus) {
275 for (unsigned a=0; a < focus.size(); a++) {
276 for (unsigned b=0; b < focus[a].size(); b++)
277 os << '/' << focus[a][b];
279 if (a < focus.size()-1)
285 metricDefinitionNode *createMetricInstance(string& metric_name,
286 vector<u_int>& focus,
287 bool enable, // true if for real; false for guessCost()
290 vector< vector<string> > canonicalFocus;
291 // we make third parameter false to avoid printing warning messages in
292 // focus2CanonicalFocus ("enable" was here previously) - naim
293 if (!focus2CanonicalFocus(focus, canonicalFocus, false)) {
294 //if (enable) cerr << "createMetricInstance failed because focus2CanonicalFocus failed" << endl;
298 //cerr << "createMetricInstance called. metric_name = " << \
299 // metric_name << endl;
300 //cerr << " canonicalFocus (derived from focus (u_int id array)) == " \
302 //for(unsigned z = 0; z < canonicalFocus.size(); z++) {
303 // vector<string> temp_strings = canonicalFocus[z];
304 // cerr << " canonicalFocus[" << z << "] : " << endl;
305 // for(unsigned y = 0; y < temp_strings.size(); y++) {
306 // cerr << " " << temp_strings[y] << endl;
310 string flat_name = metricAndCanonFocus2FlatName(metric_name, canonicalFocus);
311 //cerr << "flat_name = " << flat_name << endl;
314 // first see if it is already defined.
315 dictionary_hash_iter<unsigned, metricDefinitionNode*> mdi(allMIs);
318 * See if we can find the requested metric instance.
319 * Currently this is only used to cache structs built for cost requests
320 * which are then instantiated. This could be used as a general system
321 * to request find sub-metrics that are already.defines and use them to
322 * reduce cost. This would require adding the componenets of an aggregate
323 * into the allMIs list since searching tends to be top down, not bottom
324 * up. This would also require adding a ref count to many of the structures
325 * so they only get deleted when we are really done with them.
329 // first see if it is already defined.
331 metricDefinitionNode *mi = mdi.currval();
333 if (mi->getFullName() == flat_name) {
334 metric_cerr << "createMetricInstance: mi with flat_name " << flat_name << " already exists! using it" << endl;
335 return mi; // this metricDefinitionNode has already been defined
339 //cerr << " previous instance of metric not found, trying to create" \
342 if (mdl_can_do(metric_name)) {
343 //cerr << " mdl_can_do(metrix_name) == TRUE" << endl;
346 /* select the processes that should be instrumented. We skip process
347 that have exited, and processes that have been created but are not
348 completely initialized yet.
349 If we try to insert instrumentation in a process that is not ready
350 yet, we get a core dump.
351 A process is ready when it is not in neonatal state and the
352 isBootstrappedYet returns true.
354 vector<process*> procs;
356 for (unsigned u = 0; u < processVec.size(); u++) {
357 if (processVec[u]->status()==exited || processVec[u]->status()==neonatal
358 || processVec[u]->isBootstrappedYet())
359 procs += processVec[u];
362 if (procs.size() == 0) {
363 // there are no processes to instrument
364 //printf("createMetricInstance failed, no processes to instrument\n");
369 if (enable) computingCost = false;
370 else computingCost = true;
371 metricDefinitionNode *mi = mdl_do(canonicalFocus, metric_name, flat_name, procs, false,
374 //cerr << " mdl_do returned ";
376 // cerr << "NULL" << endl;
378 // cerr << "Non-NULL" << endl;
382 metric_cerr << "createMetricInstance failed since mdl_do failed" << endl;
383 metric_cerr << "metric name was " << metric_name << "; focus was ";
384 print_focus(metric_cerr, canonicalFocus);
388 //cerr << " mdl_can_do(metrix_name) == FALSE" << endl;
390 metricDefinitionNode *mi=doInternalMetric(canonicalFocus,
391 canonicalFocus, // is this right for component_canon_focus???
392 metric_name,flat_name,enable,matched);
393 // NULL on serious error; -1 if enable was false; -2 if illegal to instr with
394 // given focus [many internal metrics work only for whole program]
396 if (mi == (metricDefinitionNode*)-2) {
397 metric_cerr << "createMetricInstance: internal metric " << metric_name << " isn't defined for focus: ";
398 print_focus(metric_cerr, canonicalFocus);
399 mi = NULL; // straighten up the return value
401 else if (mi == (metricDefinitionNode*)-1) {
402 assert(!enable); // no error msg needed
403 mi = NULL; // straighten up the return value
405 else if (mi == NULL) {
406 // more serious error...do a printout
407 metric_cerr << "createMetricInstance failed since doInternalMetric failed" << endl;
408 metric_cerr << "metric name was " << metric_name << "; focus was ";
409 print_focus(metric_cerr, canonicalFocus);
418 // propagate this metric instance to process p.
419 // p is a process that started after the metric instance was created
420 // note: don't call this routine for a process started via fork or exec, just
421 // for processes started the "normal" way.
422 // "this" is an aggregate mi, not a component one.
424 void metricDefinitionNode::propagateToNewProcess(process *p) {
425 unsigned comp_size = components.size();
428 return; // if there are no components, shouldn't the mi be fried?
430 for (unsigned u = 0; u < comp_size; u++) {
431 if (components[u]->proc() == p) {
432 // The metric is already enabled for this process. This case can
433 // happen when we are starting several processes at the same time.
439 bool internal = false;
441 metricDefinitionNode *mi = NULL;
442 // an aggregate (not component) mi, though we know that it'll contain just
443 // one component. It's that one component that we're really interested in.
444 if (mdl_can_do(met_)) {
445 // Make the unique ID for this metric/focus visible in MDL.
446 string vname = "$globalId";
447 mdl_env::add(vname, false, MDL_T_INT);
448 mdl_env::set(this->getMId(), vname);
450 vector<process *> vp(1,p);
451 mi = mdl_do(focus_, met_, flat_name_, vp, false, false);
453 // internal and cost metrics don't need to be propagated (um, is this correct?)
457 if (mi) { // successfully created new mi
458 assert(mi->components.size() == 1);
459 metricDefinitionNode *theNewComponent = mi->components[0];
461 components += theNewComponent;
462 theNewComponent->aggregators[0] = this;
463 theNewComponent->samples[0] = aggSample.newComponent();
465 theNewComponent->insertInstrumentation();
466 theNewComponent->checkAndInstallInstrumentation();
470 const float cost = mi->cost();
471 if (cost > originalCost_) {
472 currentPredictedCost += cost - originalCost_;
473 originalCost_ = cost;
476 mi->components.resize(0); // protect the new component
481 metricDefinitionNode* metricDefinitionNode::handleExec() {
482 // called by handleExec(), below. See that routine for documentation.
483 // "this" is a component mi.
485 // If this component mi can be (re-)enabled in the new (post-exec) process, then do
486 // so. Else, remove the component mi from aggregators, etc. Returns new component
487 // mi if successful, NULL otherwise.
491 // How can we tell if the mi can be inserted into the "new" (post-exec) process?
492 // A component mi is basically a set of instReqNodes and dataReqNodes. The latter
493 // don't restrict what can be inserted (is this right?); the instReqNodes hold the
494 // key -- we should look at the functions (instPoint's) where code (whose contents
495 // are in AstNode's) would be inserted. Now clearly, the instPoint's must be
496 // checked -- if any one doesn't exist, then the instReqNode and hence the component
497 // mi doesn't belong in the post-exec process. But what about the AstNode's?
498 // Should the code that gets inserted be subject to a similar test? Probably, but
499 // we currently don't do it.
501 // BUT: Even if a process contains a function in both the pre-exec and post-exec
502 // stages, we must assume that the function is IN A DIFFERENT LOCATION IN
503 // THE ADDRESS SPACE. Ick. So the instPoint's can't be trusted and must
504 // be recalculated from scratch. In that regard, this routine is similar to
505 // propagateToNewProcess(), which propagates aggregate mi's to a brand new
506 // process (but which doesn't work for processes started via fork or exec).
507 // The lesson learned is to (ick, ick, ick) call mdl_do() all over again.
508 // This gets really confusing when you consider that a component mi can belong
509 // to several aggregate mi's (e.g. if we represent cpu time for proc 100 then
510 // we can belong to cpu/whole and cpu/proc-100); for which aggregate mi should
511 // we run mdl_do? Any will do, so we can pick arbitrarily (is this right?).
513 // QUESTION: What about internal or cost metrics??? They have aggregate and
514 // component mi's just like normal metrics, right? If that's so, then
515 // they must be propagated too! NOT YET IMPLEMENTED!!!
517 metricDefinitionNode *aggregateMI = this->aggregators[0];
518 metricDefinitionNode *resultCompMI = NULL; // so far...
520 const bool internal = !mdl_can_do(aggregateMI->met_);
522 return NULL; // NOT YET IMPLEMENTED
524 // try to propagate the mi
525 // note: the following code is mostly stolen from propagateToNewProcess(); blame
526 // it for any bugs :)
528 // Make the unique ID for this metric/focus visible in MDL. (?)
529 string vname = "$globalId";
530 mdl_env::add(vname, false, MDL_T_INT);
531 mdl_env::set(aggregateMI->getMId(), vname);
533 vector<process*> vp(1, this->proc());
534 metricDefinitionNode *tempAggMI = mdl_do(aggregateMI->focus_,
536 aggregateMI->flat_name_,
538 true, // fry existing component MI
540 if (tempAggMI == NULL)
541 return NULL; // failure
543 assert(tempAggMI->aggregate_);
545 // okay, it looks like we successfully created a new aggregate mi.
546 // Of course, we're just interested in the (single) component mi contained
547 // within it; it'll replace "this".
549 assert(tempAggMI->components.size() == 1);
550 resultCompMI = tempAggMI->components[0];
552 resultCompMI->aggregators.resize(0);
553 resultCompMI->samples.resize(0);
555 // For each aggregator, go back and find where "this" was a component mi.
556 // When found, replace the ptr to "this" with "theNewComponent".
557 unsigned num_aggregators = aggregators.size();
558 assert(num_aggregators > 0);
559 for (unsigned agglcv=0; agglcv < num_aggregators; agglcv++) {
560 metricDefinitionNode *aggMI = aggregators[agglcv];
563 for (unsigned complcv=0; complcv < aggMI->components.size(); complcv++) {
564 if (aggMI->components[complcv] == this) {
565 aggMI->components[complcv] = resultCompMI;
567 resultCompMI->aggregators += aggMI;
568 resultCompMI->samples += aggMI->aggSample.newComponent();
570 aggMI->aggSample.removeComponent(this->samples[agglcv]);
579 // Now let's actually insert the instrumentation:
581 resultCompMI->insertInstrumentation();
582 resultCompMI->checkAndInstallInstrumentation();
585 // And fry "tempAggMI", but make sure "resultCompMI" isn't fried when we do so
586 tempAggMI->components.resize(0); // protect resultCompMI
587 delete tempAggMI; // good riddance; you were an ugly hack to begin with
592 void metricDefinitionNode::handleExec(process *proc) {
593 // a static member fn.
594 // handling exec is tricky. At the time this routine is called, the "new" process
595 // has been bootstrapped and is ready for stuff to get inserted. No mi's have yet
596 // been propagated, and the data structures (allMIs, allMIComponents, etc.) are still
597 // in their old, pre-exec state, so they show component mi's enabled for this
598 // process, even though they're not (at least not yet). This routines brings things
601 // Algorithm: loop thru all component mi's for this process. If it is possible to
602 // propagate it to the "new" (post-exec) process, then do so. If not, fry the
603 // component mi. An example where a component mi can no longer fit is an mi
604 // specific to, say, function foo(), which (thanks to the exec syscall) no longer
605 // exists in this process. Note that the exec syscall changed the addr space enough
606 // so even if a given routine foo() is present in both the pre-exec and post-exec
607 // process, we must assume that it has MOVED TO A NEW LOCATION, thus making
608 // the component mi's instReqNode's instPoint out-of-date. Ick.
610 // note the two loops; we can't safely combine into one since the second loop modifies
612 vector<metricDefinitionNode*> allcomps;
613 for (dictionary_hash_iter<string,metricDefinitionNode*> iter=allMIComponents; iter; iter++)
614 allcomps += iter.currval();
616 for (unsigned i=0; i < allcomps.size(); i++) {
617 metricDefinitionNode* componentMI = allcomps[i];
618 if (componentMI->proc() != proc)
621 forkexec_cerr << "calling handleExec for component "
622 << componentMI->flat_name_ << endl;
624 metricDefinitionNode *replaceWithComponentMI = componentMI->handleExec();
626 if (replaceWithComponentMI == NULL) {
627 forkexec_cerr << "handleExec for component " << componentMI->flat_name_
628 << " failed, so not propagating it" << endl;
629 componentMI->removeThisInstance(); // propagation failed; fry component mi
632 forkexec_cerr << "handleExec for component " << componentMI->flat_name_
633 << " succeeded...it has been propagated" << endl;
634 // new component mi has already been inserted in place of old component mi
635 // in all of its aggregate's component lists. So, not much left to do,
636 // except to update allMIComponents.
638 assert(replaceWithComponentMI->flat_name_ == componentMI->flat_name_);
640 delete componentMI; // old component mi (dtor removes it from allMIComponents)
641 assert(!allMIComponents.defines(replaceWithComponentMI->flat_name_));
642 allMIComponents[replaceWithComponentMI->flat_name_] = replaceWithComponentMI;
647 // called when all components have been removed (because the processes have exited
648 // or exec'd) from "this". "this" is an aggregate mi.
649 void metricDefinitionNode::endOfDataCollection() {
650 assert(components.size() == 0);
652 // flush aggregateSamples
653 sampleInterval ret = aggSample.aggregateValues();
656 assert(ret.end > ret.start);
657 assert(ret.start >= (firstRecordTime/MILLION));
658 assert(ret.end >= (firstRecordTime/MILLION));
659 batchSampleData(id_, ret.start, ret.end, ret.value,
660 aggSample.numComponents(),false);
661 ret = aggSample.aggregateValues();
663 flush_batch_buffer();
664 // trace data streams
665 extern dictionary_hash<unsigned, unsigned> traceOn;
666 for (dictionary_hash_iter<unsigned,unsigned> iter=traceOn; iter; iter++) {
667 unsigned key = iter.currkey();
668 unsigned val = iter.currval();
671 extern void batchTraceData(int, int, int, char *);
672 extern bool TRACE_BURST_HAS_COMPLETED;
673 TRACE_BURST_HAS_COMPLETED = true;
674 batchTraceData(0, key, 0, (char *)NULL);
678 tp->endOfDataCollection(id_);
681 // remove a component from an aggregate.
682 // "this" is an aggregate mi; "comp" is a component mi.
683 void metricDefinitionNode::removeFromAggregate(metricDefinitionNode *comp,
685 unsigned size = components.size();
686 for (unsigned u = 0; u < size; u++) {
687 if (components[u] == comp) {
688 if (deleteComp) delete components[u];
689 components[u] = NULL;
690 components[u] = components[size-1];
691 components.resize(size-1);
693 endOfDataCollection();
698 // should always find the right component
702 // remove this component mi from all aggregators it is a component of.
703 // if the aggregate mi no longer has any components then fry the mi aggregate mi.
704 // called by removeFromMetricInstances, below, when a process exits (or exec's)
705 void metricDefinitionNode::removeThisInstance() {
708 // first, remove from allMIComponents (this is new --- is it right?)
709 if (allMIComponents.defines(flat_name_)) {
710 allMIComponents.undef(flat_name_);
713 assert(aggregators.size() == samples.size());
715 for (unsigned u = 0; u < aggregators.size() && u < samples.size(); u++) {
716 aggregators[u]->aggSample.removeComponent(samples[u]);
717 aggregators[u]->removeFromAggregate(this, 0);
722 // Called when a process exits, to remove the component associated to proc
723 // from all metric instances. (If, after an exec, we never want to carry over
724 // mi's from the pre-exec, then this routine will work there, too. But we try to
725 // carry over mi's whenever appropriate.)
726 // Remove the aggregate metric instances that don't have any components left
727 void removeFromMetricInstances(process *proc) {
728 // Loop through all of the _component_ mi's; for each with component process
729 // of "proc", remove the component mi from its aggregate mi.
730 // Note: imho, there should be a *per-process* vector of mi-components.
732 // note 2 loops for safety (2d loop may modify dictionary?)
733 vector<metricDefinitionNode *> MIs;
734 for (dictionary_hash_iter<string,metricDefinitionNode*> iter=allMIComponents; iter; iter++)
735 MIs += iter.currval();
737 for (unsigned j = 0; j < MIs.size(); j++) {
738 if (MIs[j]->proc() == proc)
739 MIs[j]->removeThisInstance();
741 costMetric::removeProcessFromAll(proc); // what about internal metrics?
744 /* *************************************************************************** */
746 // obligatory definition of static member vrble:
747 int metricDefinitionNode::counterId=0;
749 dataReqNode *metricDefinitionNode::addSampledIntCounter(int initialValue,
753 dataReqNode *result=NULL;
756 // shared memory sampling of a reported intCounter
757 result = new sampledShmIntCounterReqNode(initialValue,
758 metricDefinitionNode::counterId,
759 this, computingCost, doNotSample);
760 // implicit conversion to base class
762 // non-shared-memory sampling of a reported intCounter
763 result = new sampledIntCounterReqNode(initialValue,
764 metricDefinitionNode::counterId,
765 this, computingCost);
766 // implicit conversion to base class
771 metricDefinitionNode::counterId++;
773 internalMetricCounterId = metricDefinitionNode::counterId;
775 dataRequests += result;
779 dataReqNode *metricDefinitionNode::addUnSampledIntCounter(int initialValue,
780 bool computingCost) {
781 // sampling of a non-reported intCounter (probably just a predicate)
782 // NOTE: In the future, we should probably put un-sampled intcounters
783 // into shared-memory when SHM_SAMPLING is defined. After all, the shared
784 // memory heap is faster.
785 dataReqNode *result = new nonSampledIntCounterReqNode
786 (initialValue, metricDefinitionNode::counterId,
787 this, computingCost);
788 // implicit conversion to base class
791 metricDefinitionNode::counterId++;
793 internalMetricCounterId = metricDefinitionNode::counterId;
795 dataRequests += result;
799 dataReqNode *metricDefinitionNode::addWallTimer(bool computingCost) {
800 dataReqNode *result = NULL;
803 result = new sampledShmWallTimerReqNode(metricDefinitionNode::counterId, this, computingCost);
804 // implicit conversion to base class
806 result = new sampledTimerReqNode(wallTime, metricDefinitionNode::counterId, this, computingCost);
807 // implicit conversion to base class
812 metricDefinitionNode::counterId++;
814 internalMetricCounterId = metricDefinitionNode::counterId;
816 dataRequests += result;
820 dataReqNode *metricDefinitionNode::addProcessTimer(bool computingCost) {
821 dataReqNode *result = NULL;
824 result = new sampledShmProcTimerReqNode(metricDefinitionNode::counterId, this, computingCost);
825 // implicit conversion to base class
827 result = new sampledTimerReqNode(processTime, metricDefinitionNode::counterId, this, computingCost);
828 // implicit conversion to base class
833 metricDefinitionNode::counterId++;
835 internalMetricCounterId = metricDefinitionNode::counterId;
837 dataRequests += result;
841 /* *************************************************************************** */
843 // called when a process forks (by handleFork(), below). "this" is a (component)
844 // mi in the parent process. Duplicate it for the child, with appropriate
845 // changes (i.e. the pid of the component focus name differs), and return the newly
846 // created child mi. "map" maps all instInstance's of the parent to those copied into
849 // Note how beautifully everything falls into place. Consider the case of alarm
850 // sampling with cpu/whole program. Then comes the fork. The parent process has
851 // (0) a tTimer structure allocated in a specific location in the inferior heap,
852 // (1) instrumentation @ main to call startTimer on that ptr, (2) instrumentation in
853 // DYNINSTsampleValues() to call DYNINSTreportTimer on that ptr.
854 // The child process of fork will have ALL of these things in the exact same locations,
855 // which is correct. We want the timer to be located in the same spot; we want
856 // DYNINSTreportTimer to be called on the same pointer; and main() hasn't moved.
858 // So there's not much to do here. We create a new component mi (with same flat name
859 // as in the parent, except for a different pid), and call "forkProcess" for all
860 // dataReqNodes and instReqNodes, none of which have to do anything titanic.
862 metricDefinitionNode *metricDefinitionNode::forkProcess(process *child,
863 const dictionary_hash<instInstance*,instInstance*> &map) const {
864 // The "focus_" member vrble stays the same, because it was always for the
865 // metric as a whole, and not for some component.
867 // But two things must change, because they were component-specific (and the
868 // component has changed processes):
870 // (2) the component focus (not to be confused with plain focus_)
872 // For example, instead of
873 // "/Code/foo.c/myfunc, /Process/100, ...", we should have
874 // "/Code/foo.c/myfunc, /Process/101, ...", because the pid of the child
875 // differs from that of the parent.
877 // The resource structure of a given process is found in the "rid"
878 // field of class process.
879 const resource *parentResource = child->getParent()->rid;
880 const string &parentPartName = parentResource->part_name();
882 const resource *childResource = child->rid;
883 const string &childPartName = childResource->part_name();
885 vector< vector<string> > newComponentFocus = this->component_focus;
886 // we'll change the process, but not the machine name.
887 bool foundProcess = false;
889 for (unsigned hier=0; hier < component_focus.size(); hier++) {
890 if (component_focus[hier][0] == "Process") {
892 assert(component_focus[hier].size() == 2);
893 // since a component focus is by definition specific to some process
895 assert(component_focus[hier][1] == parentPartName);
897 // change the process:
898 newComponentFocus[hier][1] = childPartName;
902 assert(foundProcess);
904 string newComponentFlatName = metricAndCanonFocus2FlatName(met_, newComponentFocus);
906 metricDefinitionNode *mi =
907 new metricDefinitionNode(child,
908 met_, // metric name doesn't change
909 focus_, // focus doesn't change (tho component focus will)
910 newComponentFocus, // this is a change
911 newComponentFlatName, // this is a change
916 metricDefinitionNode::counterId++;
918 forkexec_cerr << "metricDefinitionNode::forkProcess -- component flat name for parent is " << flat_name_ << "; for child is " << mi->flat_name_ << endl;
920 internalMetricCounterId = metricDefinitionNode::counterId;
922 assert(!allMIComponents.defines(newComponentFlatName));
923 allMIComponents[newComponentFlatName] = mi;
925 // Duplicate the dataReqNodes:
926 for (unsigned u1 = 0; u1 < dataRequests.size(); u1++) {
927 // must add to midToMiMap[] before dup() to avoid some assert fails
928 const int newCounterId = metricDefinitionNode::counterId++;
929 // no relation to mi->getMId();
930 forkexec_cerr << "forked dataReqNode going into midToMiMap with id " << newCounterId << endl;
931 assert(!midToMiMap.defines(newCounterId));
932 midToMiMap[newCounterId] = mi;
934 dataReqNode *newNode = dataRequests[u1]->dup(child, mi, newCounterId, map);
935 // remember, dup() is a virtual fn, so the right dup() and hence the
936 // right fork-ctor is called.
939 mi->dataRequests += newNode;
942 // Duplicate the instReqNodes:
943 for (unsigned u2 = 0; u2 < instRequests.size(); u2++) {
944 mi->instRequests += instReqNode::forkProcess(instRequests[u2], map);
947 mi->inserted_ = true;
952 bool metricDefinitionNode::unFork(dictionary_hash<instInstance*, instInstance*> &map,
953 bool unForkInstRequests,
954 bool unForkDataRequests) {
955 // see below handleFork() for explanation of why this routine is needed.
956 // "this" is a component mi for the parent process; we need to remove copied
957 // instrumentation from the _child_ process.
958 // Returns true iff the instrumentation was removed in the child (would be false
959 // if it's not safe to remove the instrumentation in the child because it was
962 // "map" maps instInstances from the parent process to instInstances in the child
965 // We loop thru the instReqNodes of the parent process, unforking each.
966 // In addition, we need to unfork the dataReqNodes, because the alarm-sampled
967 // ones instrument DYNINSTsampleValues.
971 if (unForkInstRequests)
972 for (unsigned lcv=0; lcv < instRequests.size(); lcv++)
973 if (!instRequests[lcv].unFork(map))
974 result = false; // failure
976 if (unForkDataRequests)
977 for (unsigned lcv=0; lcv < dataRequests.size(); lcv++)
978 if (!dataRequests[lcv]->unFork(map))
979 result = false; // failure
985 // called by forkProcess of context.C, just after the fork-constructor was
986 // called for the child process.
987 void metricDefinitionNode::handleFork(const process *parent, process *child,
988 dictionary_hash<instInstance*,instInstance*> &map) {
989 // "map" defines a mapping from all instInstance's of the parent process to
990 // the copied one in the child process. Some of the child process's ones may
991 // get fried by this routine, as it detects that instrumentation has been copied
992 // (by the fork syscall, which we have no control over) which doesn't belong in
993 // the child process and therefore gets deleted manually.
995 // Remember that a given component can be shared by multiple aggregator-mi's,
996 // so be careful about duplicating a component twice. Since we loop through
997 // component mi's instead of aggregate mi's, it's no problem. Note that it's
998 // possible that only a subset of a component-mi's aggregators should get the newly
999 // created child component mi.
1001 // 2 loops for safety (2d loop may modify dictionary?)
1002 vector<metricDefinitionNode *> allComponents;
1003 for (dictionary_hash_iter<string,metricDefinitionNode*> iter=allMIComponents; iter; iter++)
1004 allComponents += iter.currval();
1006 for (unsigned complcv=0; complcv < allComponents.size(); complcv++) {
1007 metricDefinitionNode *comp = allComponents[complcv];
1009 // duplicate the component (create a new one) if it belongs in the
1010 // child process. It belongs if any of its aggregate mi's should be
1011 // propagated to the child process. An aggregate mi should be propagated
1012 // if it wasn't refined to some process.
1014 bool shouldBePropagated = false; // so far
1015 bool shouldBeUnforkedIfNotPropagated = false; // so far
1016 assert(comp->aggregators.size() > 0);
1017 for (unsigned agglcv1=0; agglcv1 < comp->aggregators.size(); agglcv1++) {
1018 metricDefinitionNode *aggMI = comp->aggregators[agglcv1];
1020 if (aggMI->focus_[resource::process].size() == 1) {
1021 // wasn't specific to any process
1022 shouldBeUnforkedIfNotPropagated = false; // we'll definitely be using it
1023 shouldBePropagated = true;
1026 else if (comp->proc() == parent)
1027 // was specific to parent process, so fork() copied it into the child,
1028 // unless it was an internal or cost metric, in which case there was nothing
1029 // for fork to copy.
1030 if (!internalMetric::isInternalMetric(aggMI->getMetName()) &&
1031 !costMetric::isCostMetric(aggMI->getMetName()))
1032 shouldBeUnforkedIfNotPropagated = true;
1034 // was specific to other process, so nothing is in the child for it yet
1038 if (!shouldBePropagated && shouldBeUnforkedIfNotPropagated) {
1039 // this component mi isn't gonna be propagated to the child process, but
1040 // the fork syscall left some residue in the child. Delete that residue now.
1041 assert(comp->proc() == parent);
1042 comp->unFork(map, true, true); // also modifies 'map' to remove items
1045 if (!shouldBePropagated)
1048 // Okay, it's time to propagate this component mi to the subset of its aggregate
1049 // mi's which weren't refined to a specific process. If we've gotten to this
1050 // point, then there _is_ at least one such aggregate.
1051 assert(shouldBePropagated);
1052 metricDefinitionNode *newComp = comp->forkProcess(child, map);
1053 // copies instr (well, fork() does this for us), allocs ctr/timer space,
1054 // initializes. Basically, copies dataReqNode's and instReqNode's.
1056 bool foundAgg = false;
1057 for (unsigned agglcv2=0; agglcv2 < comp->aggregators.size(); agglcv2++) {
1058 metricDefinitionNode *aggMI = comp->aggregators[agglcv2];
1059 if (aggMI->focus_[resource::process].size() == 1) {
1060 // this aggregate mi wasn't specific to any process, so it gets the new
1062 aggMI->components += newComp;
1063 newComp->aggregators += aggMI;
1064 newComp->samples += aggMI->aggSample.newComponent();
1072 bool metricDefinitionNode::anythingToManuallyTrigger() const {
1074 for (unsigned i=0; i < components.size(); i++)
1075 if (components[i]->anythingToManuallyTrigger())
1080 for (unsigned i=0; i < instRequests.size(); i++)
1081 if (instRequests[i].anythingToManuallyTrigger())
1089 void metricDefinitionNode::manuallyTrigger(int parentMId) {
1090 assert(anythingToManuallyTrigger());
1093 for (unsigned i=0; i < components.size(); i++)
1094 if (components[i]->anythingToManuallyTrigger())
1095 components[i]->manuallyTrigger(parentMId);
1098 for (unsigned i=0; i < instRequests.size(); i++)
1099 if (instRequests[i].anythingToManuallyTrigger()) {
1100 if (!instRequests[i].triggerNow(proc(),parentMId)) {
1101 cerr << "manual trigger failed for an inst request" << endl;
1108 // startCollecting is called by dynRPC::enableDataCollection (or enableDataCollection2)
1110 // startCollecting is a friend of metricDefinitionNode; can it be
1111 // made a member function of metricDefinitionNode instead?
1112 // Especially since it clearly is an integral part of the class;
1113 // in particular, it sets the crucial vrble "id_"
1114 int startCollecting(string& metric_name, vector<u_int>& focus, int id,
1115 vector<process *> &procsToCont)
1117 bool internal = false;
1119 // Make the unique ID for this metric/focus visible in MDL.
1120 string vname = "$globalId";
1121 mdl_env::add(vname, false, MDL_T_INT);
1122 mdl_env::set(id, vname);
1124 metricDefinitionNode *mi = createMetricInstance(metric_name, focus,
1128 //cerr << "startCollecting for " << metric_name << " failed because createMetricInstance failed" << endl;
1134 assert(!allMIs.defines(mi->id_));
1135 allMIs[mi->id_] = mi;
1137 const float cost = mi->cost();
1138 mi->originalCost_ = cost;
1140 currentPredictedCost += cost;
1143 // enable timing stuff: also code in insertInstrumentation()
1144 u_int start_size = test_heapsize;
1145 printf("ENABLE: %d %s %s\n",start_size,
1146 (mi->getMetName()).string_of(),
1147 (mi->getFullName()).string_of());
1148 static timer inTimer;
1155 // pause processes that are running and add them to procsToCont.
1156 // We don't rerun the processes after we insert instrumentation,
1157 // this will be done by our caller, after all instrumentation
1158 // has been inserted.
1159 for (unsigned u = 0; u < mi->components.size(); u++) {
1160 process *p = mi->components[u]->proc();
1161 if (p->status() == running && p->pause()) {
1167 mi->insertInstrumentation(); // calls pause and unpause (this could be a bug, since the next line should be allowed to execute before the unpause!!!)
1168 mi->checkAndInstallInstrumentation();
1170 // Now that the timers and counters have been allocated on the heap, and
1171 // the instrumentation added, we can manually execute instrumentation
1172 // we may have missed at $start.entry. But has the process been paused
1173 // all this time? Hopefully so; otherwise things can get screwy.
1175 if (mi->anythingToManuallyTrigger()) {
1176 process *theProc = mi->components[0]->proc();
1179 bool alreadyRunning = (theProc->status_ == running);
1184 mi->manuallyTrigger(id);
1187 theProc->continueProc(); // the continue will trigger our code
1189 ; // the next time the process continues, we'll trigger our code
1195 if(!start_size) start_size = test_heapsize;
1196 printf("It took %f:user %f:system %f:wall seconds heap_left: %d used %d\n"
1197 , inTimer.usecs(), inTimer.ssecs(), inTimer.wsecs(),
1198 test_heapsize,start_size-test_heapsize);
1201 metResPairsEnabled++;
1205 float guessCost(string& metric_name, vector<u_int>& focus) {
1206 // called by dynrpc.C (getPredictedDataCost())
1208 metricDefinitionNode *mi = createMetricInstance(metric_name, focus, false, internal);
1210 //metric_cerr << "guessCost returning 0.0 since createMetricInstance failed" << endl;
1214 float cost = mi->cost();
1215 // delete the metric instance, if it is not being used
1216 if (!allMIs.defines(mi->getMId()))
1222 bool metricDefinitionNode::insertInstrumentation()
1224 // returns true iff successful
1231 unsigned c_size = components.size();
1232 for (unsigned u=0; u<c_size; u++)
1233 if (!components[u]->insertInstrumentation())
1234 return false; // shouldn't we try to undo what's already put in?
1236 bool needToCont = proc_->status() == running;
1237 bool res = proc_->pause();
1241 // Loop thru "dataRequests", an array of (ptrs to) dataReqNode:
1242 // Here we allocate ctrs/timers in the inferior heap but don't
1243 // stick in any code, except (if appropriate) that we'll instrument the
1244 // application's alarm-handler when not shm sampling.
1245 unsigned size = dataRequests.size();
1246 for (unsigned u=0; u<size; u++) {
1247 // the following allocs an object in inferior heap and arranges for
1248 // it to be alarm sampled, if appropriate.
1249 // Note: this is not necessary anymore because we are allocating the
1250 // space when the constructor for dataReqNode is called. This was
1251 // done for the dyninstAPI - naim 2/18/97
1252 //if (!dataRequests[u]->insertInstrumentation(proc_, this))
1253 // return false; // shouldn't we try to undo what's already put in?
1255 unsigned mid = dataRequests[u]->getSampleId();
1256 assert(!midToMiMap.defines(mid));
1257 midToMiMap[mid] = this;
1260 // Loop thru "instRequests", an array of instReqNode:
1261 // (Here we insert code instrumentation, tramps, etc. via addInstFunc())
1262 for (unsigned u1=0; u1<instRequests.size(); u1++) {
1263 // NEW: the following may also manually trigger the instrumentation
1265 returnInstance *retInst=NULL;
1266 if (!instRequests[u1].insertInstrumentation(proc_, retInst))
1267 return false; // shouldn't we try to undo what's already put in?
1270 returnInsts += retInst;
1274 proc_->continueProc();
1280 bool metricDefinitionNode::checkAndInstallInstrumentation() {
1281 // Patch up the application to make it jump to the base trampoline(s) of this
1282 // metric. (The base trampoline and mini-tramps have already been installed
1283 // in the inferior heap). We must first check to see if it's safe to install by
1284 // doing a stack walk, and determining if anything on it overlaps with any of our
1285 // desired jumps to base tramps.
1286 // The key variable is "returnsInsts", which was created for us when the base
1287 // tramp(s) were created. Essentially, it contains the details of how we'll jump
1288 // to the base tramp (where in the code to patch, how many instructions, the
1289 // instructions themselves).
1290 // Note that it seems this routine is misnamed: it's not instrumentation that needs
1291 // to be installed (the base & mini tramps are already in place); it's just the
1292 // last step that is still needed: the jump to the base tramp.
1293 // If one or more can't be added, then a TRAP insn is inserted in the closest
1294 // common safe return point along the stack walk, and some structures are appended
1295 // to the process' "wait list", which is then checked when a TRAP signal arrives.
1296 // At that time, the jump to the base tramp is finally done. WARNING: It seems to
1297 // me that such code isn't thread-safe...just because one thread hits the TRAP,
1298 // there may still be other threads that are unsafe. It seems to me that we should
1299 // be doing this check again when a TRAP arrives...but for each thread (right now,
1300 // there's no stack walk for other threads). --ari
1302 bool needToCont = false;
1304 if (installed_) return(true);
1309 unsigned c_size = components.size();
1310 for (unsigned u=0; u<c_size; u++)
1311 components[u]->checkAndInstallInstrumentation();
1312 // why no checking of the return value?
1314 needToCont = proc_->status() == running;
1315 if (!proc_->pause()) {
1316 cerr << "checkAnd... pause failed" << endl; cerr.flush();
1320 vector<Address> pc = proc_->walkStack();
1321 // ndx 0 is where the pc is now; ndx 1 is the call site;
1322 // ndx 2 is the call site's call site, etc...
1324 // for(u_int i=0; i < pc.size(); i++){
1325 // printf("frame %d: pc = 0x%x\n",i,pc[i]);
1328 unsigned rsize = returnInsts.size();
1329 u_int max_index = 0; // first frame where it is safe to install instr
1330 bool delay_install = false; // true if some instr. needs to be delayed
1331 vector<bool> delay_elm(rsize); // wch instr. to delay
1332 // for each inst point walk the stack to determine if it can be
1333 // inserted now (it can if it is not currently on the stack)
1334 // If some can not be inserted, then find the first safe point on
1335 // the stack where all can be inserted, and set a break point
1336 for (unsigned u=0; u<rsize; u++) {
1338 bool installSafe = returnInsts[u] -> checkReturnInstance(pc,index);
1339 // if unsafe, index will be set to the first unsafe stack walk ndx
1340 // (0 being top of stack; i.e. the current pc)
1342 if (!installSafe && index > max_index)
1346 returnInsts[u] -> installReturnInstance(proc_);
1347 delay_elm[u] = false;
1349 delay_install = true;
1350 delay_elm[u] = true;
1354 if (delay_install) {
1355 // get rid of pathological cases...caused by threaded applications
1356 // TODO: this should be fixed to do something smarter
1357 if(max_index > 0 && max_index+1 >= pc.size()){
1359 //printf("max_index changed: %d\n",max_index);
1361 if(max_index > 0 && pc[max_index+1] == 0){
1363 //printf("max_index changed: %d\n",max_index);
1365 Address pc2 = pc[max_index+1];
1366 for (u_int i=0; i < rsize; i++)
1368 returnInsts[i]->addToReturnWaitingList(pc2, proc_);
1371 if (needToCont) proc_->continueProc();
1376 float metricDefinitionNode::cost() const
1380 unsigned c_size = components.size();
1381 for (unsigned u=0; u<c_size; u++) {
1382 float nc = components[u]->cost();
1383 if (nc > ret) ret = nc;
1386 for (unsigned u=0; u<instRequests.size(); u++)
1387 ret += instRequests[u].cost(proc_);
1392 void metricDefinitionNode::disable()
1394 // check for internal metrics
1396 unsigned ai_size = internalMetric::allInternalMetrics.size();
1397 for (unsigned u=0; u<ai_size; u++) {
1398 internalMetric *theIMetric = internalMetric::allInternalMetrics[u];
1399 if (theIMetric->disableByMetricDefinitionNode(this)) {
1400 //logLine("disabled internal metric\n");
1405 // check for cost metrics
1406 for (unsigned i=0; i<costMetric::allCostMetrics.size(); i++){
1407 if (costMetric::allCostMetrics[i]->node == this) {
1408 costMetric::allCostMetrics[i]->disable();
1409 //logLine("disabled cost metric\n");
1413 if (!inserted_) return;
1417 /* disable components of aggregate metrics */
1418 for (unsigned u=0; u<components.size(); u++) {
1419 metricDefinitionNode *m = components[u];
1420 unsigned aggr_size = m->aggregators.size();
1421 assert(aggr_size == m->samples.size());
1422 for (unsigned u1=0; u1 < aggr_size; u1++) {
1423 if (m->aggregators[u1] == this) {
1424 m->aggregators[u1] = m->aggregators[aggr_size-1];
1425 m->aggregators.resize(aggr_size-1);
1426 m->samples[u1] = m->samples[aggr_size-1];
1427 m->samples.resize(aggr_size-1);
1432 assert(m->aggregators.size() == aggr_size-1);
1434 // disable component only if it is not being shared
1435 if (aggr_size == 1) {
1441 vector<unsigVecType> pointsToCheck;
1442 for (unsigned u1=0; u1<instRequests.size(); u1++) {
1443 unsigVecType pointsForThisRequest =
1444 getAllTrampsAtPoint(instRequests[u1].getInstance());
1445 pointsToCheck += pointsForThisRequest;
1447 instRequests[u1].disable(pointsForThisRequest); // calls deleteInst()
1450 for (unsigned u=0; u<dataRequests.size(); u++) {
1451 unsigned mid = dataRequests[u]->getSampleId();
1452 dataRequests[u]->disable(proc_, pointsToCheck); // deinstrument
1453 assert(midToMiMap.defines(mid));
1454 midToMiMap.undef(mid);
1459 void metricDefinitionNode::removeComponent(metricDefinitionNode *comp) {
1460 assert(!comp->aggregate_);
1461 unsigned aggr_size = comp->aggregators.size();
1462 unsigned found = aggr_size;
1464 if (aggr_size == 0) {
1469 // component has more than one aggregator. Remove this from list of aggregators
1470 for (unsigned u = 0; u < aggr_size; u++) {
1471 if (comp->aggregators[u] == this) {
1476 if (found == aggr_size)
1478 assert(found < aggr_size);
1479 assert(aggr_size == comp->samples.size());
1480 comp->aggregators[found] = comp->aggregators[aggr_size-1];
1481 comp->aggregators.resize(aggr_size-1);
1482 comp->samples[found] = comp->samples[aggr_size-1];
1483 comp->samples.resize(aggr_size-1);
1485 if (aggr_size == 1) {
1492 metricDefinitionNode::~metricDefinitionNode()
1495 /* delete components of aggregate metrics */
1496 unsigned c_size = components.size();
1497 for (unsigned u=0; u<c_size; u++)
1498 removeComponent(components[u]);
1499 //delete components[u];
1500 components.resize(0);
1502 allMIComponents.undef(flat_name_);
1503 for (unsigned u=0; u<dataRequests.size(); u++) {
1504 delete dataRequests[u];
1506 dataRequests.resize(0);
1510 void metricDefinitionNode::cleanup_drn()
1512 // we assume that it is safe to delete a dataReqNode at this point,
1513 // otherwise, we would need to do something similar as in the disable
1514 // method for metricDefinitionNode - naim
1515 vector<unsigVecType> pointsToCheck;
1516 for (unsigned u=0; u<dataRequests.size(); u++) {
1517 dataRequests[u]->disable(proc_, pointsToCheck); // deinstrument
1521 // NOTE: This stuff (flush_batch_buffer() and batchSampleData()) belongs
1522 // in perfStream.C; this is an inappropriate file.
1524 //////////////////////////////////////////////////////////////////////////////
1525 // Buffer the samples before we actually send it //
1526 // Send it when the buffers are full //
1527 // or, send it when the last sample in the interval has arrived. //
1528 //////////////////////////////////////////////////////////////////////////////
1530 const unsigned SAMPLE_BUFFER_SIZE = (1*1024)/sizeof(T_dyninstRPC::batch_buffer_entry);
1531 bool BURST_HAS_COMPLETED = false;
1532 // set to true after a burst (after a processTraceStream(), or sampleNodes for
1533 // the CM5), which will force the buffer to be flushed before it fills up
1534 // (if not, we'd have bad response time)
1536 vector<T_dyninstRPC::batch_buffer_entry> theBatchBuffer (SAMPLE_BUFFER_SIZE);
1537 unsigned int batch_buffer_next=0;
1539 // The following routines (flush_batch_buffer() and batchSampleData() are
1540 // in an inappropriate src file...move somewhere more appropriate)
1541 void flush_batch_buffer() {
1542 // don't need to flush if the batch had no data (this does happen; see
1544 if (batch_buffer_next == 0)
1547 // alloc buffer of the exact size to make communication
1548 // more efficient. Why don't we send theBatchBuffer with a count?
1549 // This would work but would always (in the igen call) copy the entire
1550 // vector. This solution has the downside of calling new but is not too bad
1552 vector<T_dyninstRPC::batch_buffer_entry> copyBatchBuffer(batch_buffer_next);
1553 assert(copyBatchBuffer.size() <= theBatchBuffer.size());
1554 for (unsigned i=0; i< batch_buffer_next; i++) {
1555 copyBatchBuffer[i] = theBatchBuffer[i];
1560 t1=getCurrentTime(false);
1563 // Now let's do the actual igen call!
1564 tp->batchSampleDataCallbackFunc(0, copyBatchBuffer);
1567 t2=getCurrentTime(false);
1568 if ((float)(t2-t1) > 15.0) {
1569 sprintf(errorLine,"++--++ TEST ++--++ batchSampleDataCallbackFunc took %5.2f secs, size=%d, Kbytes=%5.2f\n",(float)(t2-t1),sizeof(T_dyninstRPC::batch_buffer_entry),(float)(sizeof(T_dyninstRPC::batch_buffer_entry)*copyBatchBuffer.size()/1024.0));
1574 BURST_HAS_COMPLETED = false;
1575 batch_buffer_next = 0;
1578 void batchSampleData(int mid, double startTimeStamp,
1579 double endTimeStamp, double value, unsigned val_weight,
1580 bool internal_metric)
1582 // This routine is called where we used to call tp->sampleDataCallbackFunc.
1583 // We buffer things up and eventually call tp->batchSampleDataCallbackFunc
1586 char myLogBuffer[120] ;
1587 sprintf(myLogBuffer, "mid %d, value %g\n", mid, value) ;
1588 logLine(myLogBuffer) ;
1591 // Flush the buffer if (1) it is full, or (2) for good response time, after
1593 if (batch_buffer_next >= SAMPLE_BUFFER_SIZE || BURST_HAS_COMPLETED)
1594 flush_batch_buffer();
1596 // Now let's batch this entry.
1597 T_dyninstRPC::batch_buffer_entry &theEntry = theBatchBuffer[batch_buffer_next];
1599 theEntry.startTimeStamp = startTimeStamp;
1600 theEntry.endTimeStamp = endTimeStamp;
1601 theEntry.value = value;
1602 theEntry.weight = val_weight;
1603 theEntry.internal_met = internal_metric;
1604 batch_buffer_next++;
1607 //////////////////////////////////////////////////////////////////////////////
1608 // Buffer the traces before we actually send it //
1609 // Send it when the buffers are full //
1610 // or, send it when the last sample in the interval has arrived. //
1611 //////////////////////////////////////////////////////////////////////////////
1613 const unsigned TRACE_BUFFER_SIZE = 10;
1614 bool TRACE_BURST_HAS_COMPLETED = false;
1615 // set to true after a burst (after a processTraceStream(), or sampleNodes for
1616 // the CM5), which will force the buffer to be flushed before it fills up
1617 // (if not, we'd have bad response time)
1619 vector<T_dyninstRPC::trace_batch_buffer_entry> theTraceBatchBuffer (TRACE_BUFFER_SIZE);
1620 unsigned int trace_batch_buffer_next=0;
1622 void flush_trace_batch_buffer(int program) {
1623 // don't need to flush if the batch had no data (this does happen; see
1625 if (trace_batch_buffer_next == 0)
1628 vector<T_dyninstRPC::trace_batch_buffer_entry> copyTraceBatchBuffer(trace_batch_buffer_next);
1629 for (unsigned i=0; i< trace_batch_buffer_next; i++)
1630 copyTraceBatchBuffer[i] = theTraceBatchBuffer[i];
1633 // Now let's do the actual igen call!
1635 tp->batchTraceDataCallbackFunc(program, copyTraceBatchBuffer);
1637 TRACE_BURST_HAS_COMPLETED = false;
1638 trace_batch_buffer_next = 0;
1641 void batchTraceData(int program, int mid, int recordLength,
1644 // Now let's batch this entry.
1645 T_dyninstRPC::trace_batch_buffer_entry &theEntry = theTraceBatchBuffer[trace_batch_buffer_next];
1647 theEntry.length = recordLength;
1648 theEntry.traceRecord = byteArray(recordPtr,recordLength);
1649 trace_batch_buffer_next++;
1651 // We buffer things up and eventually call tp->batchTraceDataCallbackFunc
1653 // Flush the buffer if (1) it is full, or (2) for good response time, after
1655 if (trace_batch_buffer_next >= TRACE_BUFFER_SIZE || TRACE_BURST_HAS_COMPLETED) {
1656 flush_trace_batch_buffer(program);
1661 void metricDefinitionNode::forwardSimpleValue(timeStamp start, timeStamp end,
1662 sampleValue value, unsigned weight,
1666 assert(start + 0.000001 >= (firstRecordTime/MILLION));
1667 assert(end >= (firstRecordTime/MILLION));
1668 assert(end > start);
1670 batchSampleData(id_, start, end, value, weight, internal_met);
1673 void metricDefinitionNode::updateValue(time64 wallTime, int new_cumulative_value) {
1674 // This is an alternative to updateValue(time64, sampleValue); this
1675 // integer-only version should be faster (fewer int-->float conversions)
1676 // and possibly more accurate (since int-->float conversions lose precision)
1678 const timeStamp sampleTime = wallTime / 1000000.0;
1679 // yuck; fp division is expensive!!!
1681 // report only the delta from the last sample
1682 assert(new_cumulative_value >= cumulativeValue_float);
1684 // note: right now, cumulativeValue is a float; we should change it to a union
1685 // of {float, int, long, long long, double}
1686 const float delta_value = new_cumulative_value - cumulativeValue_float;
1687 // note: change delta_value to "int" once we get cumulativeValue_int implemented;
1689 // updating cumulativeValue_float is much easier than the float version of
1691 cumulativeValue_float = new_cumulative_value;
1693 assert(samples.size() == aggregators.size());
1694 for (unsigned lcv=0; lcv < samples.size(); lcv++) {
1695 // call sampleInfo::newValue(). sampleInfo is in the util lib
1696 // (aggregateSample.h/.C)
1697 if (samples[lcv]->firstValueReceived()) {
1698 samples[lcv]->newValue(sampleTime, delta_value);
1700 samples[lcv]->firstTimeAndValue(sampleTime, delta_value);
1701 // formerly startTime()
1704 // call sampleInfo::updateAggregateComponent()
1705 aggregators[lcv]->updateAggregateComponent(); // metricDefinitionNode::updateAggregateComponent()
1709 void metricDefinitionNode::updateValue(time64 wallTime,
1712 timeStamp sampleTime = wallTime / 1000000.0;
1713 // note: we can probably do integer division by million quicker
1715 assert(value >= -0.01);
1717 // TODO -- is this ok?
1718 // TODO -- do sampledFuncs work ?
1719 if (style_ == EventCounter) {
1721 // only use delta from last sample.
1722 if (value < cumulativeValue_float) {
1723 if ((value/cumulativeValue_float) < 0.99999) {
1724 assert((value + 0.0001) >= cumulativeValue_float);
1726 // floating point rounding error ignore
1727 cumulativeValue_float = value;
1731 // if (value + 0.0001 < cumulativeValue_float)
1732 // printf ("WARNING: sample went backwards!!!!!\n");
1733 value -= cumulativeValue_float;
1734 cumulativeValue_float += value;
1738 // If style==EventCounter then value is changed. Otherwise, it keeps the
1739 // the current "value" (e.g. SampledFunction case). That's why it is not
1740 // necessary to have an special case for SampledFunction.
1743 assert(samples.size() == aggregators.size());
1744 for (unsigned u = 0; u < samples.size(); u++) {
1745 if (samples[u]->firstValueReceived())
1746 samples[u]->newValue(sampleTime, value);
1748 samples[u]->firstTimeAndValue(sampleTime, value);
1749 //samples[u]->startTime(sampleTime);
1751 aggregators[u]->updateAggregateComponent();
1755 void metricDefinitionNode::updateAggregateComponent()
1757 // currently called (only) by the above routine
1758 sampleInterval ret = aggSample.aggregateValues();
1759 // class aggregateSample is in util lib (aggregateSample.h)
1760 // warning: method aggregateValues() is complex
1763 assert(ret.end > ret.start);
1764 assert(ret.start + 0.000001 >= (firstRecordTime/MILLION));
1765 assert(ret.end >= (firstRecordTime/MILLION));
1766 batchSampleData(id_, ret.start, ret.end, ret.value,
1767 aggSample.numComponents(),false);
1772 // Costs are now reported to paradyn like other metrics (ie. we are not
1773 // calling reportInternalMetrics to deliver cost values, instead we wait
1774 // until we have received a new interval of cost data from each process)
1775 // note: this only works for the CM5 because all cost metrics are sumed
1776 // at the daemons and at paradyn, otherwise the CM5 needs its own version
1777 // of this routine that uses the same aggregate method as the one for paradyn
1779 #ifndef SHM_SAMPLING
1780 void processCost(process *proc, traceHeader *h, costUpdate *s)
1782 // we can probably do integer division by million quicker.
1783 timeStamp newSampleTime = (h->wall / 1000000.0);
1784 timeStamp newProcessTime = (h->process / 1000000.0);
1786 timeStamp lastProcessTime =
1787 totalPredictedCost->getLastSampleProcessTime(proc);
1789 // find the portion of uninstrumented time for this interval
1790 double unInstTime = ((newProcessTime - lastProcessTime)
1791 / (1+currentPredictedCost));
1792 // update predicted cost
1793 // note: currentPredictedCost is the same for all processes
1794 // this should be changed to be computed on a per process basis
1795 sampleValue newPredCost = totalPredictedCost->getCumulativeValue(proc);
1796 newPredCost += (float)(currentPredictedCost*unInstTime);
1797 totalPredictedCost->updateValue(proc,newPredCost,
1798 newSampleTime,newProcessTime);
1799 // update observed cost
1800 observed_cost->updateValue(proc,s->obsCostIdeal,
1801 newSampleTime,newProcessTime);
1803 // update smooth observed cost
1804 smooth_obs_cost->updateSmoothValue(proc,s->obsCostIdeal,
1805 newSampleTime,newProcessTime);
1809 #ifndef SHM_SAMPLING
1810 void processSample(int /* pid */, traceHeader *h, traceSample *s)
1812 // called from processTraceStream (perfStream.C) when a TR_SAMPLE record
1813 // has arrived from the appl.
1815 unsigned mid = s->id.id; // low-level counterId (see primitives.C)
1817 static time64 firstWall = 0;
1819 static bool firstTime = true;
1822 firstWall = h->wall;
1825 metricDefinitionNode *mi; // filled in by find() if found
1826 if (!midToMiMap.find(mid, mi)) { // low-level counterId to metricDefinitionNode
1827 metric_cerr << "TR_SAMPLE id " << s->id.id << " not for valid mi...discarding" << endl;
1831 // metric_cerr << "FROM pid " << pid << " got value " << s->value << " for id " << s->id.id << endl;
1833 // sprintf(errorLine, "sample id %d at time %8.6f = %f\n", s->id.id,
1834 // ((double) *(int*) &h->wall) + (*(((int*) &h->wall)+1))/1000000.0, s->value);
1835 // logLine(errorLine);
1836 mi->updateValue(h->wall, s->value);
1842 * functions to operate on inst request graph.
1845 instReqNode::instReqNode(instPoint *iPoint,
1848 callOrder o, bool iManuallyTrigger) {
1852 instance = NULL; // set when insertInstrumentation() calls addInstFunc()
1853 ast = assignAst(iAst);
1854 manuallyTrigger = iManuallyTrigger;
1858 instReqNode instReqNode::forkProcess(const instReqNode &parentNode,
1859 const dictionary_hash<instInstance*,instInstance*> &map) {
1860 instReqNode ret = instReqNode(parentNode.point, parentNode.ast, parentNode.when,
1862 false // don't manually trigger
1865 if (!map.find(parentNode.instance, ret.instance)) // writes to ret.instance
1871 bool instReqNode::unFork(dictionary_hash<instInstance*,instInstance*> &map) const {
1872 // The fork syscall duplicates all trampolines from the parent into the child. For
1873 // those mi's which we don't want to propagate to the child, this creates a
1874 // problem. We need to remove instrumentation code from the child. This routine
1877 // "this" represents an instReqNode in the PARENT process.
1878 // "map" maps all instInstance*'s of the parent process to instInstance*'s in the
1879 // child process. We modify "map" by setting a value to NULL.
1881 instInstance *parentInstance = getInstance();
1883 instInstance *childInstance;
1884 if (!map.find(parentInstance, childInstance)) // writes to childInstance
1887 vector<unsigned> pointsToCheck; // is it right leaving this empty on a fork()???
1888 deleteInst(childInstance, pointsToCheck);
1890 map[parentInstance] = NULL; // since we've deleted...
1892 return true; // success
1895 bool instReqNode::insertInstrumentation(process *theProc,
1896 returnInstance *&retInstance)
1898 // NEW: We may manually trigger the instrumentation, via a call to postRPCtoDo()
1900 // addInstFunc() is one of the key routines in all paradynd.
1901 // It installs a base tramp at the point (if needed), generates code
1902 // for the tramp, calls inferiorMalloc() in the text heap to get space for it,
1903 // and actually inserts the instrumentation.
1904 instance = addInstFunc(theProc, point, ast, when, order,
1905 false, // false --> don't exclude cost
1908 return (instance != NULL);
1911 void instReqNode::disable(const vector<unsigned> &pointsToCheck)
1913 deleteInst(instance, pointsToCheck);
1917 instReqNode::~instReqNode()
1923 float instReqNode::cost(process *theProc) const
1928 int unitCostInCycles;
1930 unitCostInCycles = ast->cost() + getPointCost(theProc, point) +
1931 getInsnCost(trampPreamble) + getInsnCost(trampTrailer);
1932 // printf("unit cost = %d cycles\n", unitCostInCycles);
1933 unitCost = unitCostInCycles/ cyclesPerSecond;
1934 frequency = getPointFrequency(point);
1935 value = unitCost * frequency;
1939 bool instReqNode::triggerNow(process *theProc, int mid) {
1940 assert(manuallyTrigger);
1942 theProc->postRPCtoDo(ast, false, // don't skip cost
1943 NULL, // no callback fn needed
1946 // the rpc will be launched with a call to launchRPCifAppropriate()
1947 // in the main loop (perfStream.C)
1952 /* ************************************************************************* */
1954 #ifndef SHM_SAMPLING
1955 sampledIntCounterReqNode::sampledIntCounterReqNode(int iValue, int iCounterId,
1956 metricDefinitionNode *iMi,
1957 bool computingCost) :
1959 theSampleId = iCounterId;
1960 initialValue = iValue;
1962 // The following fields are NULL until insertInstrumentation()
1966 if (!computingCost) {
1968 isOk = insertInstrumentation(iMi->proc(), iMi);
1969 assert(isOk && counterPtr!=NULL);
1973 sampledIntCounterReqNode::sampledIntCounterReqNode(const sampledIntCounterReqNode &src,
1975 metricDefinitionNode *,
1977 const dictionary_hash<instInstance*,instInstance*> &map) {
1978 // a dup() routine (call after a fork())
1979 counterPtr = src.counterPtr; // assumes addr spaces have been dup()d.
1981 if (!map.find(src.sampler, this->sampler)) // writes to this->sampler
1984 theSampleId = iCounterId;
1987 temp.id.id = this->theSampleId;
1988 temp.value = initialValue;
1989 writeToInferiorHeap(childProc, temp);
1993 sampledIntCounterReqNode::dup(process *childProc,
1994 metricDefinitionNode *mi,
1996 const dictionary_hash<instInstance*,instInstance*> &map
1998 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2000 sampledIntCounterReqNode *tmp;
2001 tmp = new sampledIntCounterReqNode(*this, childProc, mi, iCounterId, map);
2007 bool sampledIntCounterReqNode::insertInstrumentation(process *theProc,
2008 metricDefinitionNode *,
2010 // Remember counterPtr and sampler are NULL until this routine
2012 counterPtr = (intCounter*)inferiorMalloc(theProc, sizeof(intCounter), dataHeap);
2013 if (counterPtr == NULL)
2014 return false; // failure!
2016 // initialize the intCounter in the inferior heap
2018 temp.id.id = this->theSampleId;
2019 temp.value = this->initialValue;
2021 writeToInferiorHeap(theProc, temp);
2023 function_base *sampleFunction =
2024 theProc->findOneFunction("DYNINSTsampleValues");
2025 if (!sampleFunction)
2026 sampleFunction = theProc->findOneFunction("_DYNINSTsampleValues");
2027 assert(sampleFunction);
2030 tmp = new AstNode(AstNode::Constant, counterPtr);
2031 ast = new AstNode("DYNINSTreportCounter", tmp);
2034 instPoint *func_entry = (instPoint *)sampleFunction->funcEntry(theProc);
2035 sampler = addInstFunc(theProc, func_entry,
2036 ast, callPreInsn, orderLastAtPoint, false);
2039 return true; // success
2042 void sampledIntCounterReqNode::disable(process *theProc,
2043 const vector<unsigVecType> &pointsToCheck) {
2044 // We used to remove the sample id from midToMiMap here but now the caller is
2045 // responsible for that.
2047 // Remove instrumentation added to DYNINSTsampleValues(), if necessary:
2048 if (sampler != NULL)
2049 ::deleteInst(sampler, getAllTrampsAtPoint(sampler));
2051 // Deallocate space for intCounter in the inferior heap:
2052 assert(counterPtr != NULL);
2053 inferiorFree(theProc, (unsigned)counterPtr, dataHeap, pointsToCheck);
2056 void sampledIntCounterReqNode::writeToInferiorHeap(process *theProc,
2057 const intCounter &dataSrc) const {
2058 // using the contents of "dataSrc", write to the inferior heap at loc
2059 // "counterPtr" via proc->writeDataSpace()
2061 theProc->writeDataSpace(counterPtr, sizeof(intCounter), &dataSrc);
2064 bool sampledIntCounterReqNode::
2065 unFork(dictionary_hash<instInstance*,instInstance*> &map) {
2066 instInstance *parentSamplerInstance = this->sampler;
2068 instInstance *childSamplerInstance;
2069 if (!map.find(parentSamplerInstance, childSamplerInstance))
2072 vector<unsigned> pointsToCheck; // empty on purpose
2073 deleteInst(childSamplerInstance, pointsToCheck);
2075 map[parentSamplerInstance] = NULL;
2082 /* ************************************************************************* */
2086 sampledShmIntCounterReqNode::sampledShmIntCounterReqNode(int iValue,
2088 metricDefinitionNode *iMi,
2092 theSampleId = iCounterId;
2093 initialValue = iValue;
2095 // The following fields are NULL until insertInstrumentation()
2096 allocatedIndex = UINT_MAX;
2097 allocatedLevel = UINT_MAX;
2101 if (!computingCost) {
2103 isOk = insertInstrumentation(iMi->proc(), iMi, doNotSample);
2108 sampledShmIntCounterReqNode::
2109 sampledShmIntCounterReqNode(const sampledShmIntCounterReqNode &src,
2110 process *childProc, metricDefinitionNode *mi,
2111 int iCounterId, const process *parentProc) {
2112 // a dup() routine (call after a fork())
2113 // Assumes that "childProc" has been copied already (e.g., the shm seg was copied).
2115 // Note that the index w/in the inferior heap remains the same, so setting the
2116 // new inferiorCounterPtr isn't too hard. Actually, it's trivial, since other code
2117 // ensures that the new shm segment is placed in exactly the same virtual mem loc
2118 // as the previous one.
2120 // Note that the fastInferiorHeap class's fork ctor will have already copied the
2121 // actual data; we need to fill in new meta-data (new houseKeeping entries).
2123 this->allocatedIndex = src.allocatedIndex;
2124 this->allocatedLevel = src.allocatedLevel;
2126 this->theSampleId = iCounterId; // this is different from the parent's value
2127 this->initialValue = src.initialValue;
2129 superTable &theTable = childProc->getTable();
2131 // since the new shm seg is placed in exactly the same memory location as
2132 // the old one, nothing here should change.
2133 const superTable &theParentTable = parentProc->getTable();
2134 assert(theTable.index2InferiorAddr(0,childProc->threads[0]->get_pd_pos(),allocatedIndex,allocatedLevel)==theParentTable.index2InferiorAddr(0,parentProc->threads[0]->get_pd_pos(),allocatedIndex,allocatedLevel));
2136 for (unsigned i=0; i<childProc->threads.size(); i++) {
2137 // write to the raw item in the inferior heap:
2138 intCounter *localCounterPtr = (intCounter *) theTable.index2LocalAddr(0,childProc->threads[i]->get_pd_pos(),allocatedIndex,allocatedLevel);
2139 const intCounter *localSrcCounterPtr = (const intCounter *) childProc->getParent()->getTable().index2LocalAddr(0,childProc->threads[i]->get_pd_pos(),allocatedIndex,allocatedLevel);
2140 localCounterPtr->value = initialValue;
2141 localCounterPtr->id.id = theSampleId;
2142 localCounterPtr->theSpinner = localSrcCounterPtr->theSpinner;
2143 // in case we're in the middle of an operation
2146 // write HK for this intCounter:
2147 // Note: we don't assert anything about mi->getMId(), because that id has no
2148 // relation to the ids we work with (theSampleId). In fact, we (the sampling code)
2149 // just don't ever care what mi->getMId() is.
2150 assert(theSampleId >= 0);
2151 assert(midToMiMap.defines(theSampleId));
2152 assert(midToMiMap[theSampleId] == mi);
2153 intCounterHK iHKValue(theSampleId, mi);
2154 // the mi differs from the mi of the parent; theSampleId differs too.
2155 theTable.initializeHKAfterForkIntCounter(allocatedIndex, allocatedLevel, iHKValue);
2161 sampledShmIntCounterReqNode::dup(process *childProc,
2162 metricDefinitionNode *mi,
2164 const dictionary_hash<instInstance*,instInstance*> &
2166 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2168 sampledShmIntCounterReqNode *tmp;
2169 tmp = new sampledShmIntCounterReqNode(*this, childProc, mi, iCounterId, childProc->getParent());
2175 bool sampledShmIntCounterReqNode::insertInstrumentation(process *theProc,
2176 metricDefinitionNode *iMi, bool doNotSample) {
2177 // Remember counterPtr is NULL until this routine gets called.
2178 // WARNING: there will be an assert failure if the applic hasn't yet attached to the
2181 // initialize the intCounter in the inferior heap
2183 iValue.id.id = this->theSampleId;
2184 iValue.value = this->initialValue; // what about initializing 'theSpinner'???
2186 intCounterHK iHKValue(this->theSampleId, iMi);
2188 superTable &theTable = theProc->getTable();
2190 if (!theTable.allocIntCounter(iValue, iHKValue, this->allocatedIndex, this->allocatedLevel, doNotSample))
2191 return false; // failure
2193 return true; // success
2196 void sampledShmIntCounterReqNode::disable(process *theProc,
2197 const vector<unsigVecType> &pointsToCheck) {
2198 // We used to remove the sample id from midToMiMap here but now the caller is
2199 // responsible for that.
2201 superTable &theTable = theProc->getTable();
2203 // Remove from inferior heap; make sure we won't be sampled any more:
2204 vector<unsigned> trampsMaybeUsing;
2205 for (unsigned pointlcv=0; pointlcv < pointsToCheck.size(); pointlcv++)
2206 for (unsigned tramplcv=0; tramplcv < pointsToCheck[pointlcv].size(); tramplcv++)
2207 trampsMaybeUsing += pointsToCheck[pointlcv][tramplcv];
2209 theTable.makePendingFree(0,allocatedIndex,allocatedLevel,trampsMaybeUsing);
2211 #if defined(MT_THREAD)
2212 //NOTE: Not yet implemented for shm sampling! naim 4/23/97
2213 // pdThread *thr = theProc->threads[0];
2214 // thr->CTvector->remove(this->theSampleId, this->position_);
2215 // theProc->updateActiveCT(false,counter);
2221 /* ************************************************************************* */
2223 nonSampledIntCounterReqNode::nonSampledIntCounterReqNode(int iValue,
2225 metricDefinitionNode *iMi,
2226 bool computingCost) :
2228 theSampleId = iCounterId;
2229 initialValue = iValue;
2231 // The following fields are NULL until insertInstrumentation()
2234 if (!computingCost) {
2236 isOk = insertInstrumentation(iMi->proc(), iMi);
2237 assert(isOk && counterPtr!=NULL);
2241 nonSampledIntCounterReqNode::
2242 nonSampledIntCounterReqNode(const nonSampledIntCounterReqNode &src,
2243 process *childProc, metricDefinitionNode *,
2245 // a dup() routine (call after a fork())
2246 counterPtr = src.counterPtr; // assumes addr spaces have been dup()d.
2247 initialValue = src.initialValue;
2248 theSampleId = iCounterId;
2251 temp.id.id = this->theSampleId;
2252 temp.value = this->initialValue;
2253 writeToInferiorHeap(childProc, temp);
2257 nonSampledIntCounterReqNode::dup(process *childProc,
2258 metricDefinitionNode *mi,
2260 const dictionary_hash<instInstance*,instInstance*> &
2262 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2264 nonSampledIntCounterReqNode *tmp;
2265 tmp = new nonSampledIntCounterReqNode(*this, childProc, mi, iCounterId);
2271 bool nonSampledIntCounterReqNode::insertInstrumentation(process *theProc,
2272 metricDefinitionNode *,
2274 // Remember counterPtr is NULL until this routine gets called.
2275 counterPtr = (intCounter*)inferiorMalloc(theProc, sizeof(intCounter), dataHeap);
2276 if (counterPtr == NULL)
2277 return false; // failure!
2279 // initialize the intCounter in the inferior heap
2281 temp.id.id = this->theSampleId;
2282 temp.value = this->initialValue;
2284 writeToInferiorHeap(theProc, temp);
2286 return true; // success
2289 void nonSampledIntCounterReqNode::disable(process *theProc,
2290 const vector<unsigVecType> &pointsToCheck) {
2291 // We used to remove the sample id from midToMiMap here but now the caller is
2292 // responsible for that.
2294 // Deallocate space for intCounter in the inferior heap:
2295 assert(counterPtr != NULL);
2296 inferiorFree(theProc, (unsigned)counterPtr, dataHeap, pointsToCheck);
2299 void nonSampledIntCounterReqNode::writeToInferiorHeap(process *theProc,
2300 const intCounter &dataSrc) const {
2301 // using the contents of "dataSrc", write to the inferior heap at loc
2302 // "counterPtr" via proc->writeDataSpace()
2304 theProc->writeDataSpace(counterPtr, sizeof(intCounter), &dataSrc);
2307 /* ****************************************************************** */
2309 #ifndef SHM_SAMPLING
2310 sampledTimerReqNode::sampledTimerReqNode(timerType iType, int iCounterId,
2311 metricDefinitionNode *iMi,
2312 bool computingCost) :
2314 theSampleId = iCounterId;
2315 theTimerType = iType;
2317 // The following fields are NULL until insertInstrumentatoin():
2321 if (!computingCost) {
2323 isOk = insertInstrumentation(iMi->proc(), iMi);
2324 assert(isOk && timerPtr!=NULL);
2328 sampledTimerReqNode::sampledTimerReqNode(const sampledTimerReqNode &src,
2330 metricDefinitionNode *,
2332 const dictionary_hash<instInstance*,instInstance*> &map) {
2333 // a dup()-like routine; call after a fork()
2334 timerPtr = src.timerPtr; // assumes addr spaces have been dup()'d
2336 if (!map.find(src.sampler, this->sampler)) // writes to this->sampler
2339 assert(sampler); // makes sense; timers are always sampled, whereas intCounters
2340 // might be just non-sampled predicates.
2342 theSampleId = iCounterId;
2343 theTimerType = src.theTimerType;
2346 P_memset(&temp, '\0', sizeof(tTimer)); /* is this needed? */
2347 temp.id.id = this->theSampleId;
2348 temp.type = this->theTimerType;
2349 temp.normalize = 1000000;
2350 writeToInferiorHeap(childProc, temp);
2352 // WARNING: shouldn't we be resetting the raw value to count=0, start=0,
2353 // total = src.initialValue ??? On the other hand, it's not that
2354 // simple -- if the timer is active in the parent, then it'll be active
2355 // in the child. So how about setting count to src.count, start=now,
2360 sampledTimerReqNode::dup(process *childProc, metricDefinitionNode *mi,
2362 const dictionary_hash<instInstance*,instInstance*> &map
2364 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2366 sampledTimerReqNode *result = new sampledTimerReqNode(*this, childProc, mi, iCounterId, map);
2369 return NULL; // on failure, return w/o incrementing counterId
2374 bool sampledTimerReqNode::insertInstrumentation(process *theProc,
2375 metricDefinitionNode *,
2377 timerPtr = (tTimer *)inferiorMalloc(theProc, sizeof(tTimer), dataHeap);
2378 if (timerPtr == NULL)
2379 return false; // failure!
2381 // Now let's initialize the newly allocated tTimer in the inferior heap:
2383 P_memset(&temp, '\0', sizeof(tTimer));
2384 temp.id.id = this->theSampleId;
2385 temp.type = this->theTimerType;
2386 temp.normalize = 1000000;
2387 writeToInferiorHeap(theProc, temp);
2389 // Now instrument DYNINSTreportTimer:
2390 function_base *sampleFunction =
2391 theProc->findOneFunction("DYNINSTsampleValues");
2392 if (!sampleFunction)
2393 sampleFunction = theProc->findOneFunction("_DYNINSTsampleValues");
2394 assert(sampleFunction);
2397 tmp = new AstNode(AstNode::Constant, timerPtr);
2398 ast = new AstNode("DYNINSTreportTimer", tmp);
2401 instPoint *func_entry = (instPoint *)sampleFunction->funcEntry(theProc);
2402 sampler = addInstFunc(theProc, func_entry, ast,
2403 callPreInsn, orderLastAtPoint, false);
2406 return true; // successful
2409 void sampledTimerReqNode::disable(process *theProc,
2410 const vector<unsigVecType> &pointsToCheck) {
2411 // We used to remove the sample id from midToMiMap here but now the caller is
2412 // responsible for that.
2414 // Remove instrumentation added to DYNINSTsampleValues(), if necessary:
2415 if (sampler != NULL)
2416 ::deleteInst(sampler, getAllTrampsAtPoint(sampler));
2418 // Deallocate space for tTimer in the inferior heap:
2420 inferiorFree(theProc, (unsigned)timerPtr, dataHeap, pointsToCheck);
2423 void sampledTimerReqNode::writeToInferiorHeap(process *theProc,
2424 const tTimer &dataSrc) const {
2425 // using contents of "dataSrc", a local copy of the data,
2426 // write to inferior heap at loc "timerPtr" via proc->writeDataSpace()
2428 theProc->writeDataSpace(timerPtr, sizeof(tTimer), &dataSrc);
2431 bool sampledTimerReqNode::
2432 unFork(dictionary_hash<instInstance*,instInstance*> &map) {
2433 instInstance *parentSamplerInstance = sampler;
2435 instInstance *childSamplerInstance;
2436 if (!map.find(parentSamplerInstance, childSamplerInstance))
2439 vector<unsigned> pointsToCheck; // empty
2440 deleteInst(childSamplerInstance, pointsToCheck);
2442 map[parentSamplerInstance] = NULL; // since we've deleted...
2449 /* ****************************************************************** */
2452 sampledShmWallTimerReqNode::sampledShmWallTimerReqNode(int iCounterId,
2453 metricDefinitionNode *iMi,
2454 bool computingCost) :
2456 theSampleId = iCounterId;
2458 // The following fields are NULL until insertInstrumentation():
2459 allocatedIndex = UINT_MAX;
2460 allocatedLevel = UINT_MAX;
2464 if (!computingCost) {
2466 isOk = insertInstrumentation(iMi->proc(), iMi);
2471 sampledShmWallTimerReqNode::
2472 sampledShmWallTimerReqNode(const sampledShmWallTimerReqNode &src,
2474 metricDefinitionNode *mi,
2475 int iCounterId, const process *parentProc) {
2476 // a dup()-like routine; call after a fork().
2477 // Assumes that the "childProc" has been duplicated already
2479 // Note that the index w/in the inferior heap remains the same, so setting the new
2480 // inferiorTimerPtr isn't too hard. Actually, it's trivial, since other code
2481 // ensures that the new shm segment is placed in exactly the same virtual mem loc
2482 // as the previous one.
2484 // Note that the fastInferiorHeap class's fork ctor will have already copied the
2485 // actual data; we need to fill in new meta-data (new houseKeeping entries).
2487 allocatedIndex = src.allocatedIndex;
2488 allocatedLevel = src.allocatedLevel;
2490 theSampleId = iCounterId;
2491 assert(theSampleId != src.theSampleId);
2493 superTable &theTable = childProc->getTable();
2495 // since the new shm seg is placed in exactly the same memory location as
2496 // the old one, nothing here should change.
2497 const superTable &theParentTable = parentProc->getTable();
2498 assert(theTable.index2InferiorAddr(1,childProc->threads[0]->get_pd_pos(),allocatedIndex,allocatedLevel)==theParentTable.index2InferiorAddr(1,parentProc->threads[0]->get_pd_pos(),allocatedIndex,allocatedLevel));
2500 // Write new raw value in the inferior heap:
2501 // we set localTimerPtr as follows: protector1 and procetor2 should be copied from
2502 // src. total should be reset to 0. start should be set to now if active else 0.
2503 // counter should be copied from the source.
2504 // NOTE: SINCE WE COPY FROM THE SOURCE, IT'S IMPORTANT THAT ON A FORK, BOTH THE
2505 // PARENT AND CHILD ARE PAUSED UNTIL WE COPY THINGS OVER. THAT THE CHILD IS
2506 // PAUSED IS NOTHING NEW; THAT THE PARENT SHOULD BE PAUSED IS NEW NEWS!
2508 for (unsigned i=0; i<childProc->threads.size(); i++) {
2509 tTimer *localTimerPtr = (tTimer *) theTable.index2LocalAddr(1,childProc->threads[i]->get_pd_pos(),allocatedIndex,allocatedLevel);
2510 const tTimer *srcTimerPtr = (const tTimer *) childProc->getParent()->getTable().index2LocalAddr(1,childProc->threads[i]->get_pd_pos(),allocatedIndex,allocatedLevel);
2512 localTimerPtr->total = 0;
2513 localTimerPtr->counter = srcTimerPtr->counter;
2514 localTimerPtr->id.id = theSampleId;
2515 localTimerPtr->protector1 = srcTimerPtr->protector1;
2516 localTimerPtr->protector2 = srcTimerPtr->protector2;
2518 if (localTimerPtr->counter == 0)
2519 // inactive timer...this is the easy case to copy
2520 localTimerPtr->start = 0; // undefined, really
2522 // active timer...don't copy the start time from the source...make it 'now'
2523 localTimerPtr->start = getCurrWallTime();
2526 // write new HK for this tTimer:
2527 // Note: we don't assert anything about mi->getMId(), because that id has no
2528 // relation to the ids we work with (theSampleId). In fact, we (the sampling code)
2529 // just don't ever care what mi->getMId() is.
2530 assert(theSampleId >= 0);
2531 assert(midToMiMap.defines(theSampleId));
2532 assert(midToMiMap[theSampleId] == mi);
2533 wallTimerHK iHKValue(theSampleId, mi, 0); // is last param right?
2534 // the mi should differ from the mi of the parent; theSampleId differs too.
2535 theTable.initializeHKAfterForkWallTimer(allocatedIndex, allocatedLevel, iHKValue);
2541 sampledShmWallTimerReqNode::dup(process *childProc,
2542 metricDefinitionNode *mi,
2544 const dictionary_hash<instInstance*,instInstance*> &
2546 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2548 sampledShmWallTimerReqNode *tmp;
2549 tmp = new sampledShmWallTimerReqNode(*this, childProc, mi, iCounterId, childProc->getParent());
2555 bool sampledShmWallTimerReqNode::insertInstrumentation(process *theProc,
2556 metricDefinitionNode *iMi, bool) {
2557 // Remember inferiorTimerPtr is NULL until this routine gets called.
2558 // WARNING: there will be an assert failure if the applic hasn't yet attached to the
2561 // initialize the tTimer in the inferior heap
2563 P_memset(&iValue, '\0', sizeof(tTimer));
2564 iValue.id.id = this->theSampleId;
2566 wallTimerHK iHKValue(this->theSampleId, iMi, 0);
2568 superTable &theTable = theProc->getTable();
2570 if (!theTable.allocWallTimer(iValue, iHKValue, this->allocatedIndex, this->allocatedLevel))
2571 return false; // failure
2576 void sampledShmWallTimerReqNode::disable(process *theProc,
2577 const vector<unsigVecType> &pointsToCheck) {
2578 // We used to remove the sample id from midToMiMap here but now the caller is
2579 // responsible for that.
2581 superTable &theTable = theProc->getTable();
2583 // Remove from inferior heap; make sure we won't be sampled any more:
2584 vector<unsigned> trampsMaybeUsing;
2585 for (unsigned pointlcv=0; pointlcv < pointsToCheck.size(); pointlcv++)
2586 for (unsigned tramplcv=0; tramplcv < pointsToCheck[pointlcv].size(); tramplcv++)
2587 trampsMaybeUsing += pointsToCheck[pointlcv][tramplcv];
2589 theTable.makePendingFree(1,allocatedIndex,allocatedLevel,trampsMaybeUsing);
2591 #if defined(MT_THREAD)
2592 //NOTE: Not yet implemented for shm sampling! naim 4/23/97
2593 // pdThread *thr = theProc->threads[0];
2594 // thr->CTvector->remove(this->theSampleId, this->position_);
2595 // theProc->updateActiveCT(false,wallTimer);
2599 /* ****************************************************************** */
2601 sampledShmProcTimerReqNode::sampledShmProcTimerReqNode(int iCounterId,
2602 metricDefinitionNode *iMi,
2603 bool computingCost) :
2605 theSampleId = iCounterId;
2607 // The following fields are NULL until insertInstrumentatoin():
2608 allocatedIndex = UINT_MAX;
2609 allocatedLevel = UINT_MAX;
2613 if (!computingCost) {
2615 isOk = insertInstrumentation(iMi->proc(), iMi);
2620 sampledShmProcTimerReqNode::
2621 sampledShmProcTimerReqNode(const sampledShmProcTimerReqNode &src,
2623 metricDefinitionNode *mi,
2624 int iCounterId, const process *parentProc) {
2625 // a dup()-like routine; call after a fork()
2626 // Assumes that the "childProc" has been duplicated already
2628 // Note that the index w/in the inferior heap remains the same, so setting the new
2629 // inferiorTimerPtr isn't too hard. Actually, it's trivial, since other code
2630 // ensures that the new shm segment is placed in exactly the same virtual mem loc
2631 // as the previous one.
2633 // Note that the fastInferiorHeap class's fork ctor will have already copied the
2634 // actual data; we need to fill in new meta-data (new houseKeeping entries).
2636 allocatedIndex = src.allocatedIndex;
2637 allocatedLevel = src.allocatedLevel;
2638 theSampleId = iCounterId;
2639 assert(theSampleId != src.theSampleId);
2641 superTable &theTable = childProc->getTable();
2643 // since the new shm seg is placed in exactly the same memory location as
2644 // the old one, nothing here should change.
2645 const superTable &theParentTable = parentProc->getTable();
2646 assert(theTable.index2InferiorAddr(2,childProc->threads[0]->get_pd_pos(),allocatedIndex,allocatedLevel)==theParentTable.index2InferiorAddr(2,parentProc->threads[0]->get_pd_pos(),allocatedIndex,allocatedLevel));
2648 // Write new raw value:
2649 // we set localTimerPtr as follows: protector1 and procetor2 should be copied from
2650 // src. total should be reset to 0. start should be set to now if active else 0.
2651 // counter should be copied from the source.
2652 // NOTE: SINCE WE COPY FROM THE SOURCE, IT'S IMPORTANT THAT ON A FORK, BOTH THE
2653 // PARENT AND CHILD ARE PAUSED UNTIL WE COPY THINGS OVER. THAT THE CHILD IS
2654 // PAUSED IS NOTHING NEW; THAT THE PARENT SHOULD BE PAUSED IS NEW NEWS!
2656 for (unsigned i=0; i<childProc->threads.size(); i++) {
2657 tTimer *localTimerPtr = (tTimer *) theTable.index2LocalAddr(2,childProc->threads[i]->get_pd_pos(),allocatedIndex,allocatedLevel);
2658 const tTimer *srcTimerPtr = (const tTimer *) childProc->getParent()->getTable().index2LocalAddr(2,childProc->threads[i]->get_pd_pos(),allocatedIndex,allocatedLevel);
2660 localTimerPtr->total = 0;
2661 localTimerPtr->counter = srcTimerPtr->counter;
2662 localTimerPtr->id.id = theSampleId;
2663 localTimerPtr->protector1 = srcTimerPtr->protector1;
2664 localTimerPtr->protector2 = srcTimerPtr->protector2;
2666 if (localTimerPtr->counter == 0)
2667 // inactive timer...this is the easy case to copy
2668 localTimerPtr->start = 0; // undefined, really
2670 // active timer...don't copy the start time from the source...make it 'now'
2671 localTimerPtr->start = childProc->getInferiorProcessCPUtime();
2674 // Write new HK for this tTimer:
2675 // Note: we don't assert anything about mi->getMId(), because that id has no
2676 // relation to the ids we work with (theSampleId). In fact, we (the sampling code)
2677 // just don't ever care what mi->getMId() is.
2678 assert(theSampleId >= 0);
2679 assert(midToMiMap.defines(theSampleId));
2680 assert(midToMiMap[theSampleId] == mi);
2681 processTimerHK iHKValue(theSampleId, mi, 0); // is last param right?
2682 // the mi differs from the mi of the parent; theSampleId differs too.
2683 theTable.initializeHKAfterForkProcTimer(allocatedIndex, allocatedLevel, iHKValue);
2689 sampledShmProcTimerReqNode::dup(process *childProc,
2690 metricDefinitionNode *mi,
2692 const dictionary_hash<instInstance*,instInstance*> &
2694 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2696 sampledShmProcTimerReqNode *tmp;
2697 tmp = new sampledShmProcTimerReqNode(*this, childProc, mi, iCounterId, childProc->getParent());
2703 bool sampledShmProcTimerReqNode::insertInstrumentation(process *theProc,
2704 metricDefinitionNode *iMi, bool) {
2705 // Remember inferiorTimerPtr is NULL until this routine gets called.
2706 // WARNING: there will be an assert failure if the applic hasn't yet attached to the
2709 // initialize the tTimer in the inferior heap
2711 P_memset(&iValue, '\0', sizeof(tTimer));
2712 iValue.id.id = this->theSampleId;
2714 processTimerHK iHKValue(this->theSampleId, iMi, 0);
2716 superTable &theTable = theProc->getTable();
2718 if (!theTable.allocProcTimer(iValue, iHKValue, this->allocatedIndex,this->allocatedLevel))
2719 return false; // failure
2724 void sampledShmProcTimerReqNode::disable(process *theProc,
2725 const vector<unsigVecType> &pointsToCheck) {
2726 // We used to remove the sample id from midToMiMap here but now the caller is
2727 // responsible for that.
2729 superTable &theTable = theProc->getTable();
2731 // Remove from inferior heap; make sure we won't be sampled any more:
2732 vector<unsigned> trampsMaybeUsing;
2733 for (unsigned pointlcv=0; pointlcv < pointsToCheck.size(); pointlcv++)
2734 for (unsigned tramplcv=0; tramplcv < pointsToCheck[pointlcv].size(); tramplcv++)
2735 trampsMaybeUsing += pointsToCheck[pointlcv][tramplcv];
2737 theTable.makePendingFree(2,allocatedIndex,allocatedLevel,trampsMaybeUsing);
2739 #if defined(MT_THREAD)
2740 //NOTE: Not yet implemented for shm sampling! naim 4/23/97
2741 // pdThread *thr = theProc->threads[0];
2742 // thr->CTvector->remove(this->theSampleId, this->position_);
2743 // theProc->updateActiveCT(false,procTimer);
2748 /* **************************** */
2750 void reportInternalMetrics(bool force)
2752 if (isApplicationPaused())
2753 return; // we don't sample when paused (is this right?)
2755 static timeStamp end=0.0;
2757 // see if we have a sample to establish time base.
2758 if (!firstRecordTime) {
2759 //cerr << "reportInternalMetrics: no because firstRecordTime==0" << endl;
2764 end = (timeStamp)firstRecordTime/MILLION;
2766 const timeStamp now = getCurrentTime(false);
2768 // check if it is time for a sample
2769 if (!force && now < end + samplingRate) {
2770 // cerr << "reportInternalMetrics: no because now < end + samplingRate (end=" << end << "; samplingRate=" << samplingRate << "; now=" << now << ")" << endl;
2771 // cerr << "difference is " << (end+samplingRate-now) << endl;
2775 timeStamp start = end;
2778 // TODO -- clean me up, please
2783 for (unsigned u1 = 0; u1 < processVec.size(); u1++) {
2784 if (processVec[u1]->numOfActCounters_is > max1)
2785 max1=processVec[u1]->numOfActCounters_is;
2786 if (processVec[u1]->numOfActProcTimers_is > max2)
2787 max2=processVec[u1]->numOfActProcTimers_is;
2788 if (processVec[u1]->numOfActWallTimers_is > max3)
2789 max3=processVec[u1]->numOfActWallTimers_is;
2791 numOfActCounters_all=max1;
2792 numOfActProcTimers_all=max2;
2793 numOfActWallTimers_all=max3;
2795 unsigned ai_size = internalMetric::allInternalMetrics.size();
2796 for (unsigned u2=0; u2<ai_size; u2++) {
2797 internalMetric *theIMetric = internalMetric::allInternalMetrics[u2];
2798 // Loop thru all enabled instances of this internal metric...
2800 for (unsigned v=0; v < theIMetric->num_enabled_instances(); v++) {
2801 internalMetric::eachInstance &theInstance = theIMetric->getEnabledInstance(v);
2802 // not "const" since bumpCumulativeValueBy() may be called
2804 sampleValue value = (sampleValue) 0;
2805 if (theIMetric->name() == "active_processes") {
2806 //value = (end - start) * activeProcesses;
2807 value = (end - start) * theInstance.getValue();
2808 } else if (theIMetric->name() == "bucket_width") {
2809 //value = (end - start)* theInstance.getValue();
2810 // I would prefer to use (end-start) * theInstance.getValue(); however,
2811 // we've had some problems getting setValue() called in time, thus
2812 // leaving us with getValues() of 0 sometimes. See longer comment in dynrpc.C --ari
2813 extern float currSamplingRate;
2814 value = (end - start) * currSamplingRate;
2815 } else if (theIMetric->name() == "number_of_cpus") {
2816 value = (end - start) * numberOfCPUs;
2817 } else if (theIMetric->name() == "numOfActCounters") {
2818 value = (end - start) * numOfActCounters_all;
2820 } else if (theIMetric->name() == "numOfActProcTimers") {
2821 value = (end - start) * numOfActProcTimers_all;
2823 } else if (theIMetric->name() == "numOfActWallTimers") {
2824 value = (end - start) * numOfActWallTimers_all;
2826 } else if (theIMetric->name() == "infHeapMemAvailable") {
2827 value = (end - start) * inferiorMemAvailable;
2829 } else if (theIMetric->style() == EventCounter) {
2830 value = theInstance.getValue();
2831 // assert((value + 0.0001) >= imp->cumulativeValue);
2832 value -= theInstance.getCumulativeValue();
2833 theInstance.bumpCumulativeValueBy(value);
2834 } else if (theIMetric->style() == SampledFunction) {
2835 value = theInstance.getValue();
2838 theInstance.report(start, end, value);
2839 // calls metricDefinitionNode->forwardSimpleValue()
2844 void disableAllInternalMetrics() {
2845 for (unsigned u=0; u < internalMetric::allInternalMetrics.size(); u++) {
2846 internalMetric *theIMetric = internalMetric::allInternalMetrics[u];
2848 // Now loop thru all the enabled instances of this internal metric...
2849 while (theIMetric->num_enabled_instances() > 0) {
2850 internalMetric::eachInstance &theInstance = theIMetric->getEnabledInstance(0);
2851 tp->endOfDataCollection(theInstance.getMId());
2852 theIMetric->disableInstance(0);
2859 unsigned sampledShmIntCounterReqNode::getInferiorPtr(process *proc) const {
2860 // counterPtr could be NULL if we are building AstNodes just to compute
2861 // the cost - naim 2/18/97
2863 // this routine will dissapear because we can't compute the address
2864 // of the counter/timer without knowing the thread id - naim 3/17/97
2866 if (allocatedIndex == UINT_MAX || allocatedLevel == UINT_MAX) return(0);
2867 assert(proc != NULL);
2868 superTable &theTable = proc->getTable();
2869 // we assume there is only one thread
2870 return((unsigned) theTable.index2InferiorAddr(0,0,allocatedIndex,allocatedLevel));
2873 unsigned sampledShmWallTimerReqNode::getInferiorPtr(process *proc) const {
2874 // counterPtr could be NULL if we are building AstNodes just to compute
2875 // the cost - naim 2/18/97
2877 // this routine will dissapear because we can't compute the address
2878 // of the counter/timer without knowing the thread id - naim 3/17/97
2880 if (allocatedIndex == UINT_MAX || allocatedLevel == UINT_MAX) return(0);
2881 assert(proc != NULL);
2882 superTable &theTable = proc->getTable();
2883 // we assume there is only one thread
2884 return((unsigned) theTable.index2InferiorAddr(1,0,allocatedIndex,allocatedLevel));
2887 unsigned sampledShmProcTimerReqNode::getInferiorPtr(process *proc) const {
2888 // counterPtr could be NULL if we are building AstNodes just to compute
2889 // the cost - naim 2/18/97
2891 // this routine will dissapear because we can't compute the address
2892 // of the counter/timer without knowing the thread id - naim 3/17/97
2894 if (allocatedIndex == UINT_MAX || allocatedLevel == UINT_MAX) return(0);
2895 assert(proc != NULL);
2896 superTable &theTable = proc->getTable();
2897 // we assume there is only one thread
2898 return((unsigned) theTable.index2InferiorAddr(2,0,allocatedIndex,allocatedLevel));