2 * Copyright (c) 1996 Barton P. Miller
4 * We provide the Paradyn Parallel Performance Tools (below
5 * described as Paradyn") on an AS IS basis, and do not warrant its
6 * validity or performance. We reserve the right to update, modify,
7 * or discontinue this software at any time. We shall have no
8 * obligation to supply such updates or modifications or any other
9 * form of support to you.
11 * This license is for research uses. For such uses, there is no
12 * charge. We define "research use" to mean you may freely use it
13 * inside your organization for whatever purposes you see fit. But you
14 * may not re-distribute Paradyn or parts of Paradyn, in any form
15 * source or binary (including derivatives), electronic or otherwise,
16 * to any other organization or entity without our permission.
18 * (for other uses, please contact us at paradyn@cs.wisc.edu)
20 * All warranties, including without limitation, any warranty of
21 * merchantability or fitness for a particular purpose, are hereby
24 * By your use of Paradyn, you understand and agree that we (or any
25 * other person or entity with proprietary rights in Paradyn) are
26 * under no obligation to provide either maintenance services,
27 * update services, notices of latent defects, or correction of
28 * defects for Paradyn.
30 * Even if advised of the possibility of such damages, under no
31 * circumstances shall we (or any other person or entity with
32 * proprietary rights in the software licensed hereunder) be liable
33 * to you or any third party for direct, indirect, or consequential
34 * damages of any character regardless of type of action, including,
35 * without limitation, loss of profits, loss of use, loss of good
36 * will, or computer failure or malfunction. You agree to indemnify
37 * us (and any other person or entity with proprietary rights in the
38 * software licensed hereunder) for any and all liability it may
39 * incur to third parties resulting from your use of Paradyn.
42 #include "util/h/headers.h"
46 #include "rtinst/h/rtinst.h"
47 #include "rtinst/h/trace.h"
48 #include "util/h/aggregateSample.h"
57 #include "internalMetrics.h"
58 #include <strstream.h>
60 #include "perfStream.h"
64 #include "paradynd/src/mdld.h"
65 #include "util/h/Timer.h"
66 #include "showerror.h"
67 #include "costmetrics.h"
69 #include "util/h/debugOstream.h"
71 // The following vrbles were defined in process.C:
72 extern debug_ostream attach_cerr;
73 extern debug_ostream inferiorrpc_cerr;
74 extern debug_ostream shmsample_cerr;
75 extern debug_ostream forkexec_cerr;
76 extern debug_ostream metric_cerr;
80 extern int inferiorMemAvailable;
81 extern vector<unsigned> getAllTrampsAtPoint(instInstance *instance);
82 static int internalMetricCounterId = 0;
84 void flush_batch_buffer();
85 void batchSampleData(int mid, double startTimeStamp, double endTimeStamp,
86 double value, unsigned val_weight, bool internal_metric);
88 #ifdef sparc_tmc_cmost7_3
89 extern int getNumberOfCPUs();
92 double currentPredictedCost = 0.0;
94 dictionary_hash <unsigned, metricDefinitionNode*> midToMiMap(uiHash);
95 // maps low-level counter-ids to metricDefinitionNodes
97 unsigned mdnHash(const metricDefinitionNode *&mdn) {
98 return ((unsigned)mdn) >> 2; // assume all addrs are 4-byte aligned
99 // return ((unsigned) mdn);
102 unsigned componentMdnPtrHash(metricDefinitionNode * const &ptr) {
103 // maybe assert that "ptr" isn't for an aggregate mi
104 return string::hash(ptr->getFullName());
108 dictionary_hash<unsigned, metricDefinitionNode*> allMIs(uiHash);
109 dictionary_hash<string, metricDefinitionNode*> allMIComponents(string::hash);
110 vector<internalMetric*> internalMetric::allInternalMetrics;
112 // used to indicate the mi is no longer used.
114 #define MILLION 1000000.0
116 bool mdl_internal_metric_data(const string& metric_name, mdl_inst_data& result) {
117 unsigned size = internalMetric::allInternalMetrics.size();
118 for (unsigned u=0; u<size; u++) {
119 internalMetric *theIMetric = internalMetric::allInternalMetrics[u];
120 if (theIMetric->name() == metric_name) {
121 result.aggregate = theIMetric->aggregate();
122 result.style = theIMetric->style();
127 for (unsigned u2=0; u2< costMetric::allCostMetrics.size(); u2++) {
128 if (costMetric::allCostMetrics[u2]->name() == metric_name) {
129 result.aggregate = costMetric::allCostMetrics[u2]->aggregate();
130 result.style = costMetric::allCostMetrics[u2]->style();
135 return (mdl_metric_data(metric_name, result));
138 // for non-aggregate metrics
139 metricDefinitionNode::metricDefinitionNode(process *p, const string& met_name,
140 const vector< vector<string> >& foc,
141 const vector< vector<string> >& component_foc,
142 const string& component_flat_name, int agg_style)
144 aggOp(agg_style), // CM5 metrics need aggOp to be set
145 inserted_(false), installed_(false), met_(met_name),
146 focus_(foc), component_focus(component_foc),
147 flat_name_(component_flat_name),
149 cumulativeValue(0.0), samples(0),
150 id_(-1), originalCost_(0.0), proc_(p)
154 aflag=mdl_internal_metric_data(met_name, md);
159 // for aggregate metrics
160 metricDefinitionNode::metricDefinitionNode(const string& metric_name,
161 const vector< vector<string> >& foc,
162 const string& cat_name,
163 vector<metricDefinitionNode*>& parts,
165 : aggregate_(true), aggOp(agg_op), inserted_(false), installed_(false),
166 met_(metric_name), focus_(foc),
167 flat_name_(cat_name), components(parts),
169 id_(-1), originalCost_(0.0), proc_(NULL)
171 unsigned p_size = parts.size();
172 for (unsigned u=0; u<p_size; u++) {
173 metricDefinitionNode *mi = parts[u];
174 mi->aggregators += this;
175 mi->samples += aggSample.newComponent();
179 // check for "special" metrics that are computed directly by paradynd
180 // if a cost of an internal metric is asked for, enable=false
181 metricDefinitionNode *doInternalMetric(vector< vector<string> >& canon_focus,
182 vector< vector<string> >& component_canon_focus,
183 string& metric_name, string& flat_name,
184 bool enable, bool& matched)
186 // called by createMetricInstance, below.
188 // a valid metricDefinitionNode* when successful
189 // -1 --> enable was false
190 // -2 --> not legal to instrument this focus
191 // NULL --> a more serious error (probably metric-is-unknown)
194 metricDefinitionNode *mn = 0;
196 // check to see if this is an internal metric
197 unsigned im_size = internalMetric::allInternalMetrics.size();
198 for (unsigned im_index=0; im_index<im_size; im_index++){
199 internalMetric *theIMetric = internalMetric::allInternalMetrics[im_index];
200 if (theIMetric->name() == metric_name) {
203 return (metricDefinitionNode*)-1;
205 if (!theIMetric->legalToInst(canon_focus))
206 // Paradyn will handle this case and report appropriate error msg
207 return (metricDefinitionNode*)-2;
209 mn = new metricDefinitionNode(NULL, metric_name, canon_focus,
210 component_canon_focus,
211 flat_name, theIMetric->aggregate());
214 theIMetric->enableNewInstance(mn);
219 // check to see if this is a cost metric
220 for (unsigned i=0; i < costMetric::allCostMetrics.size(); i++){
221 if(costMetric::allCostMetrics[i]->name() == metric_name){
223 if (!enable) return (metricDefinitionNode*)-1;
224 costMetric *nc = costMetric::allCostMetrics[i];
225 if (!nc->legalToInst(canon_focus)) return (metricDefinitionNode*)-2;
227 mn = new metricDefinitionNode(NULL, metric_name, canon_focus,
228 component_canon_focus,
229 flat_name, nc->aggregate());
238 // No matches found among internal or cost metrics
242 // the following should probably be made a public static member fn of class metric
243 string metricAndCanonFocus2FlatName(const string &metricName,
244 const vector< vector<string> > &canonFocus) {
245 string result = metricName;
247 for (unsigned hierarchy=0; hierarchy < canonFocus.size(); hierarchy++)
248 for (unsigned component=0; component < canonFocus[hierarchy].size();
250 result += canonFocus[hierarchy][component];
255 // the following should probably be made a public static member fn of class metric
256 static bool focus2CanonicalFocus(const vector<unsigned> &focus,
257 vector< vector<string> > &canonFocus,
259 // takes in "focus", writes to "canonFocus". Returns true iff successful.
260 // if "important" is false, don't print error msg on failure (called by guessCost();
261 // no mi is really being created)
263 vector< vector<string> > unCanonFocus;
264 if (!resource::foc_to_strings(unCanonFocus, focus, important)) { // writes to unCanonFocus
266 cerr << "focus2CanonicalFocus failed since resource::foc_to_strings failed" << endl;
270 resource::make_canonical(unCanonFocus, canonFocus);
275 static void print_focus(debug_ostream &os, vector< vector<string> > &focus) {
276 for (unsigned a=0; a < focus.size(); a++) {
277 for (unsigned b=0; b < focus[a].size(); b++)
278 os << '/' << focus[a][b];
280 if (a < focus.size()-1)
286 metricDefinitionNode *createMetricInstance(string& metric_name,
287 vector<u_int>& focus,
288 bool enable, // true if for real; false for guessCost()
291 vector< vector<string> > canonicalFocus;
292 if (!focus2CanonicalFocus(focus, canonicalFocus, enable)) {
293 if (enable) // for real, so an error msg is appropriate
294 cerr << "createMetricInstance failed because focus2CanonicalFocus failed" << endl;
299 string flat_name = metricAndCanonFocus2FlatName(metric_name, canonicalFocus);
301 // first see if it is already defined.
302 dictionary_hash_iter<unsigned, metricDefinitionNode*> mdi(allMIs);
305 * See if we can find the requested metric instance.
306 * Currently this is only used to cache structs built for cost requests
307 * which are then instantiated. This could be used as a general system
308 * to request find sub-metrics that are already.defines and use them to
309 * reduce cost. This would require adding the componenets of an aggregate
310 * into the allMIs list since searching tends to be top down, not bottom
311 * up. This would also require adding a ref count to many of the structures
312 * so they only get deleted when we are really done with them.
317 metricDefinitionNode *mi= NULL;
319 // TODO -- a dictionary search here will be much faster
320 while (mdi.next(key, mi))
321 if (mi->getFullName() == flat_name) {
322 metric_cerr << "createMetricInstance: mi with flat_name " << flat_name << " already exists! using it" << endl;
323 return mi; // this metricDefinitionNode has already been defined
326 if (mdl_can_do(metric_name)) {
329 /* select the processes that should be instrumented. We skip process
330 that have exited, and processes that have been created but are not
331 completely initialized yet.
332 If we try to insert instrumentation in a process that is not ready
333 yet, we get a core dump.
334 A process is ready when it is not in neonatal state and the
335 isBootstrappedYet returns true.
337 vector<process*> procs;
339 for (unsigned u = 0; u < processVec.size(); u++) {
340 if (processVec[u]->status()==exited || processVec[u]->status()==neonatal
341 || processVec[u]->isBootstrappedYet())
342 procs += processVec[u];
345 if (procs.size() == 0) {
346 // there are no processes to instrument
350 mi = mdl_do(canonicalFocus, metric_name, flat_name, procs, false);
352 metric_cerr << "createMetricInstance failed since mdl_do failed" << endl;
353 metric_cerr << "metric name was " << metric_name << "; focus was ";
354 print_focus(metric_cerr, canonicalFocus);
358 mi=doInternalMetric(canonicalFocus,
359 canonicalFocus, // is this right for component_canon_focus???
360 metric_name,flat_name,enable,matched);
361 // NULL on serious error; -1 if enable was false; -2 if illegal to instr with
362 // given focus [many internal metrics work only for whole program]
364 if (mi == (metricDefinitionNode*)-2) {
365 metric_cerr << "createMetricInstance: internal metric " << metric_name << " isn't defined for focus: ";
366 print_focus(metric_cerr, canonicalFocus);
367 mi = NULL; // straighten up the return value
369 else if (mi == (metricDefinitionNode*)-1) {
370 assert(!enable); // no error msg needed
371 mi = NULL; // straighten up the return value
373 else if (mi == NULL) {
374 // more serious error...do a printout
375 metric_cerr << "createMetricInstance failed since doInternalMetric failed" << endl;
376 metric_cerr << "metric name was " << metric_name << "; focus was ";
377 print_focus(metric_cerr, canonicalFocus);
387 // propagate this metric instance to process p.
388 // p is a process that started after the metric instance was created
389 // note: don't call this routine for a process started via fork or exec, just
390 // for processes started the "normal" way.
391 // "this" is an aggregate mi, not a component one.
393 void metricDefinitionNode::propagateToNewProcess(process *p) {
394 unsigned comp_size = components.size();
397 return; // if there are no components, shouldn't the mi be fried?
399 for (unsigned u = 0; u < comp_size; u++) {
400 if (components[u]->proc() == p) {
401 // The metric is already enabled for this process. This case can
402 // happen when we are starting several processes at the same time.
408 bool internal = false;
410 metricDefinitionNode *mi = NULL;
411 // an aggregate (not component) mi, though we know that it'll contain just
412 // one component. It's that one component that we're really interested in.
413 if (mdl_can_do(met_)) {
414 // Make the unique ID for this metric/focus visible in MDL.
415 string vname = "$globalId";
416 mdl_env::add(vname, false, MDL_T_INT);
417 mdl_env::set(this->getMId(), vname);
419 vector<process *> vp(1,p);
420 mi = mdl_do(focus_, met_, flat_name_, vp, false);
422 // internal and cost metrics don't need to be propagated (um, is this correct?)
426 if (mi) { // successfully created new mi
427 assert(mi->components.size() == 1);
428 metricDefinitionNode *theNewComponent = mi->components[0];
430 components += theNewComponent;
431 theNewComponent->aggregators[0] = this;
432 theNewComponent->samples[0] = aggSample.newComponent();
434 theNewComponent->insertInstrumentation();
435 theNewComponent->checkAndInstallInstrumentation();
439 const float cost = mi->cost();
440 if (cost > originalCost_) {
441 currentPredictedCost += cost - originalCost_;
442 originalCost_ = cost;
445 mi->components.resize(0); // protect the new component
450 metricDefinitionNode* metricDefinitionNode::handleExec() {
451 // called by handleExec(), below. See that routine for documentation.
452 // "this" is a component mi.
454 // If this component mi can be (re-)enabled in the new (post-exec) process, then do
455 // so. Else, remove the component mi from aggregators, etc. Returns new component
456 // mi if successful, NULL otherwise.
460 // How can we tell if the mi can be inserted into the "new" (post-exec) process?
461 // A component mi is basically a set of instReqNodes and dataReqNodes. The latter
462 // don't restrict what can be inserted (is this right?); the instReqNodes hold the
463 // key -- we should look at the functions (instPoint's) where code (whose contents
464 // are in AstNode's) would be inserted. Now clearly, the instPoint's must be
465 // checked -- if any one doesn't exist, then the instReqNode and hence the component
466 // mi doesn't belong in the post-exec process. But what about the AstNode's?
467 // Should the code that gets inserted be subject to a similar test? Probably, but
468 // we currently don't do it.
470 // BUT: Even if a process contains a function in both the pre-exec and post-exec
471 // stages, we must assume that the function is IN A DIFFERENT LOCATION IN
472 // THE ADDRESS SPACE. Ick. So the instPoint's can't be trusted and must
473 // be recalculated from scratch. In that regard, this routine is similar to
474 // propagateToNewProcess(), which propagates aggregate mi's to a brand new
475 // process (but which doesn't work for processes started via fork or exec).
476 // The lesson learned is to (ick, ick, ick) call mdl_do() all over again.
477 // This gets really confusing when you consider that a component mi can belong
478 // to several aggregate mi's (e.g. if we represent cpu time for proc 100 then
479 // we can belong to cpu/whole and cpu/proc-100); for which aggregate mi should
480 // we run mdl_do? Any will do, so we can pick arbitrarily (is this right?).
482 // QUESTION: What about internal or cost metrics??? They have aggregate and
483 // component mi's just like normal metrics, right? If that's so, then
484 // they must be propagated too! NOT YET IMPLEMENTED!!!
486 metricDefinitionNode *aggregateMI = this->aggregators[0];
487 metricDefinitionNode *resultCompMI = NULL; // so far...
489 const bool internal = !mdl_can_do(aggregateMI->met_);
491 return NULL; // NOT YET IMPLEMENTED
493 // try to propagate the mi
494 // note: the following code is mostly stolen from propagateToNewProcess(); blame
495 // it for any bugs :)
497 // Make the unique ID for this metric/focus visible in MDL. (?)
498 string vname = "$globalId";
499 mdl_env::add(vname, false, MDL_T_INT);
500 mdl_env::set(aggregateMI->getMId(), vname);
502 vector<process*> vp(1, this->proc());
503 metricDefinitionNode *tempAggMI = mdl_do(aggregateMI->focus_,
505 aggregateMI->flat_name_,
507 true // --> fry existing component MI
509 if (tempAggMI == NULL)
510 return NULL; // failure
512 assert(tempAggMI->aggregate_);
514 // okay, it looks like we successfully created a new aggregate mi.
515 // Of course, we're just interested in the (single) component mi contained
516 // within it; it'll replace "this".
518 assert(tempAggMI->components.size() == 1);
519 resultCompMI = tempAggMI->components[0];
521 resultCompMI->aggregators.resize(0);
522 resultCompMI->samples.resize(0);
524 // For each aggregator, go back and find where "this" was a component mi.
525 // When found, replace the ptr to "this" with "theNewComponent".
526 unsigned num_aggregators = aggregators.size();
527 assert(num_aggregators > 0);
528 for (unsigned agglcv=0; agglcv < num_aggregators; agglcv++) {
529 metricDefinitionNode *aggMI = aggregators[agglcv];
532 for (unsigned complcv=0; complcv < aggMI->components.size(); complcv++) {
533 if (aggMI->components[complcv] == this) {
534 aggMI->components[complcv] = resultCompMI;
536 resultCompMI->aggregators += aggMI;
537 resultCompMI->samples += aggMI->aggSample.newComponent();
539 aggMI->aggSample.removeComponent(this->samples[agglcv]);
548 // Now let's actually insert the instrumentation:
550 resultCompMI->insertInstrumentation();
551 resultCompMI->checkAndInstallInstrumentation();
554 // And fry "tempAggMI", but make sure "resultCompMI" isn't fried when we do so
555 tempAggMI->components.resize(0); // protect resultCompMI
556 delete tempAggMI; // good riddance; you were an ugly hack to begin with
561 void metricDefinitionNode::handleExec(process *proc) {
562 // a static member fn.
563 // handling exec is tricky. At the time this routine is called, the "new" process
564 // has been bootstrapped and is ready for stuff to get inserted. No mi's have yet
565 // been propagated, and the data structures (allMIs, allMIComponents, etc.) are still
566 // in their old, pre-exec state, so they show component mi's enabled for this
567 // process, even though they're not (at least not yet). This routines brings things
570 // Algorithm: loop thru all component mi's for this process. If it is possible to
571 // propagate it to the "new" (post-exec) process, then do so. If not, fry the
572 // component mi. An example where a component mi can no longer fit is an mi
573 // specific to, say, function foo(), which (thanks to the exec syscall) no longer
574 // exists in this process. Note that the exec syscall changed the addr space enough
575 // so even if a given routine foo() is present in both the pre-exec and post-exec
576 // process, we must assume that it has MOVED TO A NEW LOCATION, thus making
577 // the component mi's instReqNode's instPoint out-of-date. Ick.
579 vector<metricDefinitionNode*> miComponents = allMIComponents.values();
580 for (unsigned lcv=0; lcv < miComponents.size(); lcv++) {
581 metricDefinitionNode *componentMI = miComponents[lcv];
582 if (componentMI->proc() != proc)
585 forkexec_cerr << "calling handleExec for component "
586 << componentMI->flat_name_ << endl;
588 metricDefinitionNode *replaceWithComponentMI = componentMI->handleExec();
590 if (replaceWithComponentMI == NULL) {
591 forkexec_cerr << "handleExec for component " << componentMI->flat_name_
592 << " failed, so not propagating it" << endl;
593 componentMI->removeThisInstance(); // propagation failed; fry component mi
596 forkexec_cerr << "handleExec for component " << componentMI->flat_name_
597 << " succeeded...it has been propagated" << endl;
598 // new component mi has already been inserted in place of old component mi
599 // in all of its aggregate's component lists. So, not much left to do,
600 // except to update allMIComponents.
602 assert(replaceWithComponentMI->flat_name_ == componentMI->flat_name_);
604 delete componentMI; // old component mi (dtor removes it from allMIComponents)
605 assert(!allMIComponents.defines(replaceWithComponentMI->flat_name_));
606 allMIComponents[replaceWithComponentMI->flat_name_] = replaceWithComponentMI;
611 // called when all components have been removed (because the processes have exited
612 // or exec'd) from "this". "this" is an aggregate mi.
613 void metricDefinitionNode::endOfDataCollection() {
614 assert(components.size() == 0);
616 // flush aggregateSamples
617 sampleInterval ret = aggSample.aggregateValues();
620 assert(ret.end > ret.start);
621 assert(ret.start >= (firstRecordTime/MILLION));
622 assert(ret.end >= (firstRecordTime/MILLION));
623 batchSampleData(id_, ret.start, ret.end, ret.value,
624 aggSample.numComponents(),false);
625 ret = aggSample.aggregateValues();
627 flush_batch_buffer();
628 tp->endOfDataCollection(id_);
631 // remove a component from an aggregate.
632 // "this" is an aggregate mi; "comp" is a component mi.
633 void metricDefinitionNode::removeFromAggregate(metricDefinitionNode *comp) {
634 unsigned size = components.size();
635 for (unsigned u = 0; u < size; u++) {
636 if (components[u] == comp) {
637 delete components[u];
638 components[u] = NULL;
639 components[u] = components[size-1];
640 components.resize(size-1);
642 endOfDataCollection();
647 // should always find the right component
651 // remove this component mi from all aggregators it is a component of.
652 // if the aggregate mi no longer has any components then fry the mi aggregate mi.
653 // called by removeFromMetricInstances, below, when a process exits (or exec's)
654 void metricDefinitionNode::removeThisInstance() {
657 // first, remove from allMIComponents (this is new --- is it right?)
658 assert(allMIComponents.defines(flat_name_));
659 allMIComponents.undef(flat_name_);
661 assert(aggregators.size() == samples.size());
662 unsigned aggr_size = aggregators.size();
663 assert(aggr_size > 0);
665 //for (unsigned u = 0; u < aggr_size; u++) {
666 for (unsigned u = 0; u < aggregators.size() && u < samples.size(); u++) {
667 aggregators[u]->aggSample.removeComponent(samples[u]);
668 aggregators[u]->removeFromAggregate(this);
673 // Called when a process exits, to remove the component associated to proc
674 // from all metric instances. (If, after an exec, we never want to carry over
675 // mi's from the pre-exec, then this routine will work there, too. But we try to
676 // carry over mi's whenever appropriate.)
677 // Remove the aggregate metric instances that don't have any components left
678 void removeFromMetricInstances(process *proc) {
679 // Loop through all of the _component_ mi's; for each with component process
680 // of "proc", remove the component mi from its aggregate mi.
681 // Note: imho, there should be a *per-process* vector of mi-components.
683 vector<metricDefinitionNode *> MIs = allMIComponents.values();
684 for (unsigned j = 0; j < MIs.size(); j++) {
685 if (MIs[j]->proc() == proc)
686 MIs[j]->removeThisInstance();
688 costMetric::removeProcessFromAll(proc); // what about internal metrics?
691 /* *************************************************************************** */
693 // obligatory definition of static member vrble:
694 int metricDefinitionNode::counterId=0;
696 dataReqNode *metricDefinitionNode::addSampledIntCounter(int initialValue) {
697 dataReqNode *result=NULL;
700 // shared memory sampling of a reported intCounter
701 result = new sampledShmIntCounterReqNode(initialValue,
702 metricDefinitionNode::counterId);
703 // implicit conversion to base class
705 // non-shared-memory sampling of a reported intCounter
706 result = new sampledIntCounterReqNode(initialValue,
707 metricDefinitionNode::counterId);
708 // implicit conversion to base class
713 metricDefinitionNode::counterId++;
715 internalMetricCounterId = metricDefinitionNode::counterId;
717 dataRequests += result;
721 dataReqNode *metricDefinitionNode::addUnSampledIntCounter(int initialValue) {
722 // sampling of a non-reported intCounter (probably just a predicate)
723 // NOTE: In the future, we should probably put un-sampled intcounters
724 // into shared-memory when SHM_SAMPLING is defined. After all, the shared
725 // memory heap is faster.
726 dataReqNode *result = new nonSampledIntCounterReqNode
727 (initialValue, metricDefinitionNode::counterId);
728 // implicit conversion to base class
731 metricDefinitionNode::counterId++;
733 internalMetricCounterId = metricDefinitionNode::counterId;
735 dataRequests += result;
739 dataReqNode *metricDefinitionNode::addWallTimer() {
740 dataReqNode *result = NULL;
743 result = new sampledShmWallTimerReqNode(metricDefinitionNode::counterId);
744 // implicit conversion to base class
746 result = new sampledTimerReqNode(wallTime, metricDefinitionNode::counterId);
747 // implicit conversion to base class
752 metricDefinitionNode::counterId++;
754 internalMetricCounterId = metricDefinitionNode::counterId;
756 dataRequests += result;
760 dataReqNode *metricDefinitionNode::addProcessTimer() {
761 dataReqNode *result = NULL;
764 result = new sampledShmProcTimerReqNode(metricDefinitionNode::counterId);
765 // implicit conversion to base class
767 result = new sampledTimerReqNode(processTime, metricDefinitionNode::counterId);
768 // implicit conversion to base class
773 metricDefinitionNode::counterId++;
775 internalMetricCounterId = metricDefinitionNode::counterId;
777 dataRequests += result;
781 /* *************************************************************************** */
783 // called when a process forks (by handleFork(), below). "this" is a (component)
784 // mi in the parent process. Duplicate it for the child, with appropriate
785 // changes (i.e. the pid of the component focus name differs), and return the newly
786 // created child mi. "map" maps all instInstance's of the parent to those copied into
789 // Note how beautifully everything falls into place. Consider the case of alarm
790 // sampling with cpu/whole program. Then comes the fork. The parent process has
791 // (0) a tTimer structure allocated in a specific location in the inferior heap,
792 // (1) instrumentation @ main to call startTimer on that ptr, (2) instrumentation in
793 // DYNINSTsampleValues() to call DYNINSTreportTimer on that ptr.
794 // The child process of fork will have ALL of these things in the exact same locations,
795 // which is correct. We want the timer to be located in the same spot; we want
796 // DYNINSTreportTimer to be called on the same pointer; and main() hasn't moved.
798 // So there's not much to do here. We create a new component mi (with same flat name
799 // as in the parent, except for a different pid), and call "forkProcess" for all
800 // dataReqNodes and instReqNodes, none of which have to do anything titanic.
802 metricDefinitionNode *metricDefinitionNode::forkProcess(process *child,
803 const dictionary_hash<instInstance*,instInstance*> &map) const {
804 // The "focus_" member vrble stays the same, because it was always for the
805 // metric as a whole, and not for some component.
807 // But two things must change, because they were component-specific (and the
808 // component has changed processes):
810 // (2) the component focus (not to be confused with plain focus_)
812 // For example, instead of
813 // "/Code/foo.c/myfunc, /Process/100, ...", we should have
814 // "/Code/foo.c/myfunc, /Process/101, ...", because the pid of the child
815 // differs from that of the parent.
817 // The resource structure of a given process is found in the "rid"
818 // field of class process.
819 const resource *parentResource = child->getParent()->rid;
820 const string &parentPartName = parentResource->part_name();
822 const resource *childResource = child->rid;
823 const string &childPartName = childResource->part_name();
825 vector< vector<string> > newComponentFocus = this->component_focus;
826 // we'll change the process, but not the machine name.
827 bool foundProcess = false;
829 for (unsigned hier=0; hier < component_focus.size(); hier++) {
830 if (component_focus[hier][0] == "Process") {
832 assert(component_focus[hier].size() == 2);
833 // since a component focus is by definition specific to some process
835 assert(component_focus[hier][1] == parentPartName);
837 // change the process:
838 newComponentFocus[hier][1] = childPartName;
842 assert(foundProcess);
844 string newComponentFlatName = metricAndCanonFocus2FlatName(met_, newComponentFocus);
846 metricDefinitionNode *mi =
847 new metricDefinitionNode(child,
848 met_, // metric name doesn't change
849 focus_, // focus doesn't change (tho component focus will)
850 newComponentFocus, // this is a change
851 newComponentFlatName, // this is a change
856 metricDefinitionNode::counterId++;
858 forkexec_cerr << "metricDefinitionNode::forkProcess -- component flat name for parent is " << flat_name_ << "; for child is " << mi->flat_name_ << endl;
860 internalMetricCounterId = metricDefinitionNode::counterId;
862 assert(!allMIComponents.defines(newComponentFlatName));
863 allMIComponents[newComponentFlatName] = mi;
865 // Duplicate the dataReqNodes:
866 for (unsigned u = 0; u < dataRequests.size(); u++) {
867 // must add to midToMiMap[] before dup() to avoid some assert fails
868 const int newCounterId = metricDefinitionNode::counterId++;
869 // no relation to mi->getMId();
870 forkexec_cerr << "forked dataReqNode going into midToMiMap with id " << newCounterId << endl;
871 assert(!midToMiMap.defines(newCounterId));
872 midToMiMap[newCounterId] = mi;
874 dataReqNode *newNode = dataRequests[u]->dup(child, mi, newCounterId, map);
875 // remember, dup() is a virtual fn, so the right dup() and hence the
876 // right fork-ctor is called.
879 mi->dataRequests += newNode;
882 // Duplicate the instReqNodes:
883 for (unsigned u = 0; u < instRequests.size(); u++) {
884 mi->instRequests += instReqNode::forkProcess(instRequests[u], map);
887 mi->inserted_ = true;
892 bool metricDefinitionNode::unFork(dictionary_hash<instInstance*, instInstance*> &map,
893 bool unForkInstRequests,
894 bool unForkDataRequests) {
895 // see below handleFork() for explanation of why this routine is needed.
896 // "this" is a component mi for the parent process; we need to remove copied
897 // instrumentation from the _child_ process.
898 // Returns true iff the instrumentation was removed in the child (would be false
899 // if it's not safe to remove the instrumentation in the child because it was
902 // "map" maps instInstances from the parent process to instInstances in the child
905 // We loop thru the instReqNodes of the parent process, unforking each.
906 // In addition, we need to unfork the dataReqNodes, because the alarm-sampled
907 // ones instrument DYNINSTsampleValues.
911 if (unForkInstRequests)
912 for (unsigned lcv=0; lcv < instRequests.size(); lcv++)
913 if (!instRequests[lcv].unFork(map))
914 result = false; // failure
916 if (unForkDataRequests)
917 for (unsigned lcv=0; lcv < dataRequests.size(); lcv++)
918 if (!dataRequests[lcv]->unFork(map))
919 result = false; // failure
925 // called by forkProcess of context.C, just after the fork-constructor was
926 // called for the child process.
927 void metricDefinitionNode::handleFork(const process *parent, process *child,
928 dictionary_hash<instInstance*,instInstance*> &map) {
929 // "map" defines a mapping from all instInstance's of the parent process to
930 // the copied one in the child process. Some of the child process's ones may
931 // get fried by this routine, as it detects that instrumentation has been copied
932 // (by the fork syscall, which we have no control over) which doesn't belong in
933 // the child process and therefore gets deleted manually.
935 // Remember that a given component can be shared by multiple aggregator-mi's,
936 // so be careful about duplicating a component twice. Since we loop through
937 // component mi's instead of aggregate mi's, it's no problem. Note that it's
938 // possible that only a subset of a component-mi's aggregators should get the newly
939 // created child component mi.
941 vector<metricDefinitionNode *> allComponents = allMIComponents.values();
942 for (unsigned complcv=0; complcv < allComponents.size(); complcv++) {
943 metricDefinitionNode *comp = allComponents[complcv];
945 // duplicate the component (create a new one) if it belongs in the
946 // child process. It belongs if any of its aggregate mi's should be
947 // propagated to the child process. An aggregate mi should be propagated
948 // if it wasn't refined to some process.
950 bool shouldBePropagated = false; // so far
951 bool shouldBeUnforkedIfNotPropagated = false; // so far
952 assert(comp->aggregators.size() > 0);
953 for (unsigned agglcv=0; agglcv < comp->aggregators.size(); agglcv++) {
954 metricDefinitionNode *aggMI = comp->aggregators[agglcv];
956 if (aggMI->focus_[resource::process].size() == 1) {
957 // wasn't specific to any process
958 shouldBeUnforkedIfNotPropagated = false; // we'll definitely be using it
959 shouldBePropagated = true;
962 else if (comp->proc() == parent)
963 // was specific to parent process, so fork() copied it into the child,
964 // unless it was an internal or cost metric, in which case there was nothing
966 if (!internalMetric::isInternalMetric(aggMI->getMetName()) &&
967 !costMetric::isCostMetric(aggMI->getMetName()))
968 shouldBeUnforkedIfNotPropagated = true;
970 // was specific to other process, so nothing is in the child for it yet
974 if (!shouldBePropagated && shouldBeUnforkedIfNotPropagated) {
975 // this component mi isn't gonna be propagated to the child process, but
976 // the fork syscall left some residue in the child. Delete that residue now.
977 assert(comp->proc() == parent);
978 comp->unFork(map, true, true); // also modifies 'map' to remove items
981 if (!shouldBePropagated)
984 // Okay, it's time to propagate this component mi to the subset of its aggregate
985 // mi's which weren't refined to a specific process. If we've gotten to this
986 // point, then there _is_ at least one such aggregate.
987 assert(shouldBePropagated);
988 metricDefinitionNode *newComp = comp->forkProcess(child, map);
989 // copies instr (well, fork() does this for us), allocs ctr/timer space,
990 // initializes. Basically, copies dataReqNode's and instReqNode's.
992 bool foundAgg = false;
993 for (unsigned agglcv=0; agglcv < comp->aggregators.size(); agglcv++) {
994 metricDefinitionNode *aggMI = comp->aggregators[agglcv];
995 if (aggMI->focus_[resource::process].size() == 1) {
996 // this aggregate mi wasn't specific to any process, so it gets the new
998 aggMI->components += newComp;
999 newComp->aggregators += aggMI;
1000 newComp->samples += aggMI->aggSample.newComponent();
1008 bool metricDefinitionNode::anythingToManuallyTrigger() const {
1010 for (unsigned i=0; i < components.size(); i++)
1011 if (components[i]->anythingToManuallyTrigger())
1016 for (unsigned i=0; i < instRequests.size(); i++)
1017 if (instRequests[i].anythingToManuallyTrigger())
1025 void metricDefinitionNode::manuallyTrigger() {
1026 assert(anythingToManuallyTrigger());
1029 for (unsigned i=0; i < components.size(); i++)
1030 components[i]->manuallyTrigger();
1033 for (unsigned i=0; i < instRequests.size(); i++)
1034 if (instRequests[i].anythingToManuallyTrigger())
1035 if (!instRequests[i].triggerNow(proc())) {
1036 cerr << "manual trigger failed for an inst request" << endl;
1042 // startCollecting is called by dynRPC::enableDataCollection (or enableDataCollection2)
1044 // startCollecting is a friend of metricDefinitionNode; can it be
1045 // made a member function of metricDefinitionNode instead?
1046 // Especially since it clearly is an integral part of the class;
1047 // in particular, it sets the crucial vrble "id_"
1048 int startCollecting(string& metric_name, vector<u_int>& focus, int id,
1049 vector<process *> &procsToCont)
1051 bool internal = false;
1053 // Make the unique ID for this metric/focus visible in MDL.
1054 string vname = "$globalId";
1055 mdl_env::add(vname, false, MDL_T_INT);
1056 mdl_env::set(id, vname);
1058 metricDefinitionNode *mi = createMetricInstance(metric_name, focus,
1062 cerr << "startCollecting failed because createMetricInstance failed" << endl;
1068 assert(!allMIs.defines(mi->id_));
1069 allMIs[mi->id_] = mi;
1071 const float cost = mi->cost();
1072 mi->originalCost_ = cost;
1074 currentPredictedCost += cost;
1077 // enable timing stuff: also code in insertInstrumentation()
1078 u_int start_size = test_heapsize;
1079 printf("ENABLE: %d %s %s\n",start_size,
1080 (mi->getMetName()).string_of(),
1081 (mi->getFullName()).string_of());
1082 static timer inTimer;
1089 // pause processes that are running and add them to procsToCont.
1090 // We don't rerun the processes after we insert instrumentation,
1091 // this will be done by our caller, after all instrumentation
1092 // has been inserted.
1093 for (unsigned u = 0; u < mi->components.size(); u++) {
1094 process *p = mi->components[u]->proc();
1095 if (p->status() == running && p->pause()) {
1101 mi->insertInstrumentation(); // calls pause and unpause (this could be a bug, since the next line should be allowed to execute before the unpause!!!)
1102 mi->checkAndInstallInstrumentation();
1104 // Now that the timers and counters have been allocated on the heap, and
1105 // the instrumentation added, we can manually execute instrumentation
1106 // we may have missed at $start.entry. But has the process been paused
1107 // all this time? Hopefully so; otherwise things can get screwy.
1109 if (mi->anythingToManuallyTrigger()) {
1110 process *theProc = mi->components[0]->proc();
1113 bool alreadyRunning = (theProc->status_ == running);
1118 mi->manuallyTrigger();
1121 theProc->continueProc(); // the continue will trigger our code
1123 ; // the next time the process continues, we'll trigger our code
1129 if(!start_size) start_size = test_heapsize;
1130 printf("It took %f:user %f:system %f:wall seconds heap_left: %d used %d\n"
1131 , inTimer.usecs(), inTimer.ssecs(), inTimer.wsecs(),
1132 test_heapsize,start_size-test_heapsize);
1135 metResPairsEnabled++;
1139 float guessCost(string& metric_name, vector<u_int>& focus) {
1140 // called by dynrpc.C (getPredictedDataCost())
1142 metricDefinitionNode *mi = createMetricInstance(metric_name, focus, false, internal);
1144 //metric_cerr << "guessCost returning 0.0 since createMetricInstance failed" << endl;
1148 float cost = mi->cost();
1149 // delete the metric instance, if it is not being used
1150 if (!allMIs.defines(mi->getMId()))
1156 bool metricDefinitionNode::insertInstrumentation()
1158 // returns true iff successful
1165 unsigned c_size = components.size();
1166 for (unsigned u=0; u<c_size; u++)
1167 if (!components[u]->insertInstrumentation())
1168 return false; // shouldn't we try to undo what's already put in?
1170 bool needToCont = proc_->status() == running;
1171 bool res = proc_->pause();
1175 // Loop thru "dataRequests", an array of (ptrs to) dataReqNode:
1176 // Here we allocate ctrs/timers in the inferior heap but don't
1177 // stick in any code, except (if appropriate) that we'll instrument the
1178 // application's alarm-handler when not shm sampling.
1179 unsigned size = dataRequests.size();
1180 for (unsigned u=0; u<size; u++) {
1181 // the following allocs an object in inferior heap and arranges for
1182 // it to be alarm sampled, if appropriate.
1183 if (!dataRequests[u]->insertInstrumentation(proc_, this))
1184 return false; // shouldn't we try to undo what's already put in?
1186 unsigned mid = dataRequests[u]->getSampleId();
1187 assert(!midToMiMap.defines(mid));
1188 midToMiMap[mid] = this;
1191 // Loop thru "instRequests", an array of instReqNode:
1192 // (Here we insert code instrumentation, tramps, etc. via addInstFunc())
1193 for (unsigned u1=0; u1<instRequests.size(); u1++) {
1194 // NEW: the following may also manually trigger the instrumentation
1196 returnInstance *retInst=NULL;
1197 if (!instRequests[u1].insertInstrumentation(proc_, retInst))
1198 return false; // shouldn't we try to undo what's already put in?
1201 returnInsts += retInst;
1205 proc_->continueProc();
1211 bool metricDefinitionNode::checkAndInstallInstrumentation() {
1212 bool needToCont = false;
1214 if (installed_) return(true);
1219 unsigned c_size = components.size();
1220 for (unsigned u=0; u<c_size; u++)
1221 components[u]->checkAndInstallInstrumentation();
1223 needToCont = proc_->status() == running;
1224 if (!proc_->pause()) {
1225 cerr << "checkAnd... pause failed" << endl; cerr.flush();
1229 vector<Address> pc = proc_->walkStack();
1231 // for(u_int i=0; i < pc.size(); i++){
1232 // printf("frame %d: pc = 0x%x\n",i,pc[i]);
1235 unsigned rsize = returnInsts.size();
1236 u_int max_index = 0; // first frame where it is safe to install instr
1237 bool delay_install = false; // true if some instr. needs to be delayed
1238 vector<bool> delay_elm(rsize); // wch instr. to delay
1239 // for each inst point walk the stack to determine if it can be
1240 // inserted now (it can if it is not currently on the stack)
1241 // If some can not be inserted, then find the first safe point on
1242 // the stack where all can be inserted, and set a break point
1243 for (unsigned u=0; u<rsize; u++) {
1245 bool installSafe = returnInsts[u] -> checkReturnInstance(pc,index);
1246 if ((!installSafe) && (index > max_index)) max_index = index;
1249 //cerr << "installSafe!" << endl;
1250 returnInsts[u] -> installReturnInstance(proc_);
1251 delay_elm[u] = false;
1253 delay_install = true;
1254 delay_elm[u] = true;
1258 // get rid of pathological cases...caused by threaded applications
1259 // TODO: this should be fixed to do something smarter
1260 if((max_index > 0) && ((max_index+1) >= pc.size())){
1262 //printf("max_index changed: %d\n",max_index);
1264 if((max_index > 0) && (pc[max_index+1] == 0)){
1266 //printf("max_index changed: %d\n",max_index);
1268 Address pc2 = pc[max_index+1];
1269 for(u_int i=0; i < rsize; i++){
1271 returnInsts[i]->addToReturnWaitingList(pc2, proc_);
1276 if (needToCont) proc_->continueProc();
1281 float metricDefinitionNode::cost() const
1285 unsigned c_size = components.size();
1286 for (unsigned u=0; u<c_size; u++) {
1287 float nc = components[u]->cost();
1288 if (nc > ret) ret = nc;
1291 for (unsigned u=0; u<instRequests.size(); u++)
1292 ret += instRequests[u].cost(proc_);
1297 void metricDefinitionNode::disable()
1299 // check for internal metrics
1301 unsigned ai_size = internalMetric::allInternalMetrics.size();
1302 for (unsigned u=0; u<ai_size; u++) {
1303 internalMetric *theIMetric = internalMetric::allInternalMetrics[u];
1304 if (theIMetric->disableByMetricDefinitionNode(this)) {
1305 //logLine("disabled internal metric\n");
1310 // check for cost metrics
1311 for (unsigned i=0; i<costMetric::allCostMetrics.size(); i++){
1312 if (costMetric::allCostMetrics[i]->node == this) {
1313 costMetric::allCostMetrics[i]->disable();
1314 //logLine("disabled cost metric\n");
1318 if (!inserted_) return;
1322 /* disable components of aggregate metrics */
1323 // unsigned c_size = components.size();
1324 //for (unsigned u=0; u<c_size; u++) {
1325 for (unsigned u=0; u<components.size(); u++) {
1326 //components[u]->disable();
1327 metricDefinitionNode *m = components[u];
1328 unsigned aggr_size = m->aggregators.size();
1329 assert(aggr_size == m->samples.size());
1330 for (unsigned u1=0; u1 < aggr_size; u1++) {
1331 if (m->aggregators[u1] == this) {
1332 m->aggregators[u1] = m->aggregators[aggr_size-1];
1333 m->aggregators.resize(aggr_size-1);
1334 m->samples[u1] = m->samples[aggr_size-1];
1335 m->samples.resize(aggr_size-1);
1339 assert(m->aggregators.size() == aggr_size-1);
1340 // disable component only if it is not being shared
1341 if (aggr_size == 1) {
1347 vector<unsigVecType> pointsToCheck;
1348 for (unsigned u1=0; u1<instRequests.size(); u1++) {
1349 unsigVecType pointsForThisRequest =
1350 getAllTrampsAtPoint(instRequests[u1].getInstance());
1351 pointsToCheck += pointsForThisRequest;
1353 instRequests[u1].disable(pointsForThisRequest); // calls deleteInst()
1356 for (unsigned u=0; u<dataRequests.size(); u++) {
1357 unsigned mid = dataRequests[u]->getSampleId();
1358 dataRequests[u]->disable(proc_, pointsToCheck); // deinstrument
1359 assert(midToMiMap.defines(mid));
1360 midToMiMap.undef(mid);
1365 void metricDefinitionNode::removeComponent(metricDefinitionNode *comp) {
1366 assert(!comp->aggregate_);
1367 unsigned aggr_size = comp->aggregators.size();
1368 unsigned found = aggr_size;
1370 if (aggr_size == 0) {
1375 // component has more than one aggregator. Remove this from list of aggregators
1376 for (unsigned u = 0; u < aggr_size; u++) {
1377 if (comp->aggregators[u] == this) {
1382 if (found == aggr_size)
1384 assert(found < aggr_size);
1385 assert(aggr_size == comp->samples.size());
1386 comp->aggregators[found] = comp->aggregators[aggr_size-1];
1387 comp->aggregators.resize(aggr_size-1);
1388 comp->samples[found] = comp->samples[aggr_size-1];
1389 comp->samples.resize(aggr_size-1);
1391 if (aggr_size == 1) {
1398 metricDefinitionNode::~metricDefinitionNode()
1401 /* delete components of aggregate metrics */
1402 unsigned c_size = components.size();
1403 for (unsigned u=0; u<c_size; u++)
1404 removeComponent(components[u]);
1405 //delete components[u];
1406 components.resize(0);
1408 allMIComponents.undef(flat_name_);
1409 unsigned size = dataRequests.size();
1410 for (unsigned u=0; u<size; u++)
1411 delete dataRequests[u];
1416 // NOTE: This stuff (flush_batch_buffer() and batchSampleData()) belongs
1417 // in perfStream.C; this is an inappropriate file.
1419 //////////////////////////////////////////////////////////////////////////////
1420 // Buffer the samples before we actually send it //
1421 // Send it when the buffers are full //
1422 // or, send it when the last sample in the interval has arrived. //
1423 //////////////////////////////////////////////////////////////////////////////
1425 const unsigned SAMPLE_BUFFER_SIZE = (1*1024)/sizeof(T_dyninstRPC::batch_buffer_entry);
1426 bool BURST_HAS_COMPLETED = false;
1427 // set to true after a burst (after a processTraceStream(), or sampleNodes for
1428 // the CM5), which will force the buffer to be flushed before it fills up
1429 // (if not, we'd have bad response time)
1431 vector<T_dyninstRPC::batch_buffer_entry> theBatchBuffer (SAMPLE_BUFFER_SIZE);
1432 unsigned int batch_buffer_next=0;
1434 // The following routines (flush_batch_buffer() and batchSampleData() are
1435 // in an inappropriate src file...move somewhere more appropriate)
1436 void flush_batch_buffer() {
1437 // don't need to flush if the batch had no data (this does happen; see
1439 if (batch_buffer_next == 0)
1442 // alloc buffer of the exact size to make communication
1443 // more efficient. Why don't we send theBatchBuffer with a count?
1444 // This would work but would always (in the igen call) copy the entire
1445 // vector. This solution has the downside of calling new but is not too bad
1447 vector<T_dyninstRPC::batch_buffer_entry> copyBatchBuffer(batch_buffer_next);
1448 assert(copyBatchBuffer.size() <= theBatchBuffer.size());
1449 for (unsigned i=0; i< batch_buffer_next; i++) {
1450 copyBatchBuffer[i] = theBatchBuffer[i];
1455 t1=getCurrentTime(false);
1458 // Now let's do the actual igen call!
1459 tp->batchSampleDataCallbackFunc(0, copyBatchBuffer);
1462 t2=getCurrentTime(false);
1463 if ((float)(t2-t1) > 15.0) {
1464 sprintf(errorLine,"++--++ TEST ++--++ batchSampleDataCallbackFunc took %5.2f secs, size=%d, Kbytes=%5.2f\n",(float)(t2-t1),sizeof(T_dyninstRPC::batch_buffer_entry),(float)(sizeof(T_dyninstRPC::batch_buffer_entry)*copyBatchBuffer.size()/1024.0));
1469 BURST_HAS_COMPLETED = false;
1470 batch_buffer_next = 0;
1473 void batchSampleData(int mid, double startTimeStamp,
1474 double endTimeStamp, double value, unsigned val_weight,
1475 bool internal_metric)
1477 // This routine is called where we used to call tp->sampleDataCallbackFunc.
1478 // We buffer things up and eventually call tp->batchSampleDataCallbackFunc
1481 char myLogBuffer[120] ;
1482 sprintf(myLogBuffer, "mid %d, value %g\n", mid, value) ;
1483 logLine(myLogBuffer) ;
1486 // Flush the buffer if (1) it is full, or (2) for good response time, after
1488 if (batch_buffer_next >= SAMPLE_BUFFER_SIZE || BURST_HAS_COMPLETED)
1489 flush_batch_buffer();
1491 // Now let's batch this entry.
1492 T_dyninstRPC::batch_buffer_entry &theEntry = theBatchBuffer[batch_buffer_next];
1494 theEntry.startTimeStamp = startTimeStamp;
1495 theEntry.endTimeStamp = endTimeStamp;
1496 theEntry.value = value;
1497 theEntry.weight = val_weight;
1498 theEntry.internal_met = internal_metric;
1499 batch_buffer_next++;
1502 void metricDefinitionNode::forwardSimpleValue(timeStamp start, timeStamp end,
1503 sampleValue value, unsigned weight,
1507 assert(start + 0.000001 >= (firstRecordTime/MILLION));
1508 assert(end >= (firstRecordTime/MILLION));
1509 assert(end > start);
1511 batchSampleData(id_, start, end, value, weight, internal_met);
1514 void metricDefinitionNode::updateValue(time64 wallTime,
1517 timeStamp sampleTime = wallTime / 1000000.0;
1518 // note: we can probably do integer division by million quicker
1520 assert(value >= -0.01);
1522 // TODO -- is this ok?
1523 // TODO -- do sampledFuncs work ?
1524 if (style_ == EventCounter) {
1526 // only use delta from last sample.
1527 if (value < cumulativeValue) {
1528 if ((value/cumulativeValue) < 0.99999) {
1529 assert((value + 0.0001) >= cumulativeValue);
1531 // floating point rounding error ignore
1532 cumulativeValue = value;
1536 // if (value + 0.0001 < cumulativeValue)
1537 // printf ("WARNING: sample went backwards!!!!!\n");
1538 value -= cumulativeValue;
1539 cumulativeValue += value;
1543 // If style==EventCounter then value is changed. Otherwise, it keeps the
1544 // the current "value" (e.g. SampledFunction case). That's why it is not
1545 // necessary to have an special case for SampledFunction.
1548 assert(samples.size() == aggregators.size());
1549 for (unsigned u = 0; u < samples.size(); u++) {
1550 if (samples[u]->firstValueReceived())
1551 samples[u]->newValue(sampleTime, value);
1553 samples[u]->startTime(sampleTime);
1555 aggregators[u]->updateAggregateComponent();
1559 void metricDefinitionNode::updateAggregateComponent()
1561 // currently called (only) by the above routine
1562 sampleInterval ret = aggSample.aggregateValues();
1564 assert(ret.end > ret.start);
1565 assert(ret.start + 0.000001 >= (firstRecordTime/MILLION));
1566 assert(ret.end >= (firstRecordTime/MILLION));
1567 batchSampleData(id_, ret.start, ret.end, ret.value,
1568 aggSample.numComponents(),false);
1571 // metric_cerr << "sorry, ret.valid false so not batching sample data" << endl;
1576 // Costs are now reported to paradyn like other metrics (ie. we are not
1577 // calling reportInternalMetrics to deliver cost values, instead we wait
1578 // until we have received a new interval of cost data from each process)
1579 // note: this only works for the CM5 because all cost metrics are sumed
1580 // at the daemons and at paradyn, otherwise the CM5 needs its own version
1581 // of this routine that uses the same aggregate method as the one for paradyn
1583 #ifndef SHM_SAMPLING
1584 void processCost(process *proc, traceHeader *h, costUpdate *s)
1586 // we can probably do integer division by million quicker.
1587 timeStamp newSampleTime = (h->wall / 1000000.0);
1588 timeStamp newProcessTime = (h->process / 1000000.0);
1590 timeStamp lastProcessTime =
1591 totalPredictedCost->getLastSampleProcessTime(proc);
1593 // find the portion of uninstrumented time for this interval
1594 double unInstTime = ((newProcessTime - lastProcessTime)
1595 / (1+currentPredictedCost));
1596 // update predicted cost
1597 // note: currentPredictedCost is the same for all processes
1598 // this should be changed to be computed on a per process basis
1599 sampleValue newPredCost = totalPredictedCost->getCumulativeValue(proc);
1600 newPredCost += (float)(currentPredictedCost*unInstTime);
1601 totalPredictedCost->updateValue(proc,newPredCost,
1602 newSampleTime,newProcessTime);
1603 // update observed cost
1604 observed_cost->updateValue(proc,s->obsCostIdeal,
1605 newSampleTime,newProcessTime);
1607 // update smooth observed cost
1608 smooth_obs_cost->updateSmoothValue(proc,s->obsCostIdeal,
1609 newSampleTime,newProcessTime);
1613 #ifndef SHM_SAMPLING
1614 void processSample(int /* pid */, traceHeader *h, traceSample *s)
1616 // called from processTraceStream (perfStream.C) when a TR_SAMPLE record
1617 // has arrived from the appl.
1619 unsigned mid = s->id.id; // low-level counterId (see primitives.C)
1621 static long long firstWall = 0;
1623 static bool firstTime = true;
1626 firstWall = h->wall;
1629 metricDefinitionNode *mi; // filled in by find() if found
1630 if (!midToMiMap.find(mid, mi)) { // low-level counterId to metricDefinitionNode
1631 metric_cerr << "TR_SAMPLE id " << s->id.id << " not for valid mi...discarding" << endl;
1635 // metric_cerr << "FROM pid " << pid << " got value " << s->value << " for id " << s->id.id << endl;
1637 // sprintf(errorLine, "sample id %d at time %8.6f = %f\n", s->id.id,
1638 // ((double) *(int*) &h->wall) + (*(((int*) &h->wall)+1))/1000000.0, s->value);
1639 // logLine(errorLine);
1640 mi->updateValue(h->wall, s->value);
1646 * functions to operate on inst request graph.
1649 instReqNode::instReqNode(instPoint *iPoint,
1652 callOrder o, bool iManuallyTrigger) {
1656 instance = NULL; // set when insertInstrumentation() calls addInstFunc()
1657 ast = assignAst(iAst);
1658 manuallyTrigger = iManuallyTrigger;
1662 instReqNode instReqNode::forkProcess(const instReqNode &parentNode,
1663 const dictionary_hash<instInstance*,instInstance*> &map) {
1664 instReqNode ret = instReqNode(parentNode.point, parentNode.ast, parentNode.when,
1666 false // don't manually trigger
1669 if (!map.find(parentNode.instance, ret.instance)) // writes to ret.instance
1675 bool instReqNode::unFork(dictionary_hash<instInstance*,instInstance*> &map) const {
1676 // The fork syscall duplicates all trampolines from the parent into the child. For
1677 // those mi's which we don't want to propagate to the child, this creates a
1678 // problem. We need to remove instrumentation code from the child. This routine
1681 // "this" represents an instReqNode in the PARENT process.
1682 // "map" maps all instInstance*'s of the parent process to instInstance*'s in the
1683 // child process. We modify "map" by setting a value to NULL.
1685 instInstance *parentInstance = getInstance();
1687 instInstance *childInstance;
1688 if (!map.find(parentInstance, childInstance)) // writes to childInstance
1691 vector<unsigned> pointsToCheck; // is it right leaving this empty on a fork()???
1692 deleteInst(childInstance, pointsToCheck);
1694 map[parentInstance] = NULL; // since we've deleted...
1696 return true; // success
1699 bool instReqNode::insertInstrumentation(process *theProc,
1700 returnInstance *&retInstance)
1702 // NEW: We may manually trigger the instrumentation, via a call to postRPCtoDo()
1704 // addInstFunc() is one of the key routines in all paradynd.
1705 // It installs a base tramp at the point (if needed), generates code
1706 // for the tramp, calls inferiorMalloc() in the text heap to get space for it,
1707 // and actually inserts the instrumentation.
1708 instance = addInstFunc(theProc, point, ast, when, order,
1709 false, // false --> don't exclude cost
1712 return (instance != NULL);
1715 void instReqNode::disable(const vector<unsigned> &pointsToCheck)
1717 deleteInst(instance, pointsToCheck);
1721 instReqNode::~instReqNode()
1727 float instReqNode::cost(process *theProc) const
1732 int unitCostInCycles;
1734 unitCostInCycles = ast->cost() + getPointCost(theProc, point) +
1735 getInsnCost(trampPreamble) + getInsnCost(trampTrailer);
1736 // printf("unit cost = %d cycles\n", unitCostInCycles);
1737 unitCost = unitCostInCycles/ cyclesPerSecond;
1738 frequency = getPointFrequency(point);
1739 value = unitCost * frequency;
1743 bool instReqNode::triggerNow(process *theProc) {
1744 assert(manuallyTrigger);
1746 theProc->postRPCtoDo(ast, false, // don't skip cost
1747 NULL, // no callback fn needed
1750 // the rpc will be launched with a call to launchRPCifAppropriate()
1751 // in the main loop (perfStream.C)
1756 /* ************************************************************************* */
1758 #ifndef SHM_SAMPLING
1759 sampledIntCounterReqNode::sampledIntCounterReqNode(int iValue, int iCounterId) :
1761 theSampleId = iCounterId;
1762 initialValue = iValue;
1764 // The following fields are NULL until insertInstrumentation()
1768 #if defined(MT_THREAD)
1773 sampledIntCounterReqNode::sampledIntCounterReqNode(const sampledIntCounterReqNode &src,
1775 metricDefinitionNode *,
1777 const dictionary_hash<instInstance*,instInstance*> &map) {
1778 // a dup() routine (call after a fork())
1779 counterPtr = src.counterPtr; // assumes addr spaces have been dup()d.
1781 if (!map.find(src.sampler, this->sampler)) // writes to this->sampler
1784 theSampleId = iCounterId;
1787 temp.id.id = this->theSampleId;
1788 temp.value = initialValue;
1789 writeToInferiorHeap(childProc, temp);
1791 #if defined(MT_THREAD)
1797 sampledIntCounterReqNode::dup(process *childProc,
1798 metricDefinitionNode *mi,
1800 const dictionary_hash<instInstance*,instInstance*> &map
1802 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
1804 sampledIntCounterReqNode *tmp;
1805 tmp = new sampledIntCounterReqNode(*this, childProc, mi, iCounterId, map);
1808 #if defined(MT_THREAD)
1809 // initialize position for new counter id with the same value as the
1810 // position for the "parent"
1812 process *parent = childProc->getParent();
1814 // NOTE: this needs to be done for every thread, not just for threads[0]
1815 Thread *thr = parent->threads[0];
1816 childProc->threads[0]->CTvector->dup(tmp->theSampleId, iCounterId, thr, tmp->position_);
1822 bool sampledIntCounterReqNode::insertInstrumentation(process *theProc,
1823 metricDefinitionNode *) {
1824 // Remember counterPtr and sampler are NULL until this routine
1826 counterPtr = (intCounter*)inferiorMalloc(theProc, sizeof(intCounter), dataHeap);
1827 if (counterPtr == NULL)
1828 return false; // failure!
1830 // initialize the intCounter in the inferior heap
1832 temp.id.id = this->theSampleId;
1833 temp.value = this->initialValue;
1835 writeToInferiorHeap(theProc, temp);
1837 pdFunction *sampleFunction = theProc->findOneFunction("DYNINSTsampleValues");
1838 assert(sampleFunction);
1841 tmp = new AstNode(AstNode::Constant, counterPtr);
1842 ast = new AstNode("DYNINSTreportCounter", tmp);
1845 instPoint *func_entry = sampleFunction->funcEntry(theProc);
1846 sampler = addInstFunc(theProc, func_entry,
1847 ast, callPreInsn, orderLastAtPoint, false);
1850 #if defined(MT_THREAD)
1851 updateCounterTimerVectorMT(theProc,this->theSampleId,this->position_,getInferiorPtr());
1854 return true; // success
1857 void sampledIntCounterReqNode::disable(process *theProc,
1858 const vector<unsigVecType> &pointsToCheck) {
1859 // We used to remove the sample id from midToMiMap here but now the caller is
1860 // responsible for that.
1862 // Remove instrumentation added to DYNINSTsampleValues(), if necessary:
1863 if (sampler != NULL)
1864 ::deleteInst(sampler, getAllTrampsAtPoint(sampler));
1866 // Deallocate space for intCounter in the inferior heap:
1867 assert(counterPtr != NULL);
1868 inferiorFree(theProc, (unsigned)counterPtr, dataHeap, pointsToCheck);
1870 #if defined(MT_THREAD)
1871 Thread *thr = theProc->threads[0];
1872 thr->CTvector->remove(this->theSampleId, this->position_);
1876 void sampledIntCounterReqNode::writeToInferiorHeap(process *theProc,
1877 const intCounter &dataSrc) const {
1878 // using the contents of "dataSrc", write to the inferior heap at loc
1879 // "counterPtr" via proc->writeDataSpace()
1881 theProc->writeDataSpace(counterPtr, sizeof(intCounter), &dataSrc);
1884 bool sampledIntCounterReqNode::
1885 unFork(dictionary_hash<instInstance*,instInstance*> &map) {
1886 instInstance *parentSamplerInstance = this->sampler;
1888 instInstance *childSamplerInstance;
1889 if (!map.find(parentSamplerInstance, childSamplerInstance))
1892 vector<unsigned> pointsToCheck; // empty on purpose
1893 deleteInst(childSamplerInstance, pointsToCheck);
1895 map[parentSamplerInstance] = NULL;
1902 /* ************************************************************************* */
1906 sampledShmIntCounterReqNode::sampledShmIntCounterReqNode(int iValue, int iCounterId) :
1908 theSampleId = iCounterId;
1909 initialValue = iValue;
1911 // The following fields are NULL until insertInstrumentation()
1912 allocatedIndex = UINT_MAX;
1913 inferiorCounterPtr = NULL;
1915 #if defined(MT_THREAD)
1920 sampledShmIntCounterReqNode::
1921 sampledShmIntCounterReqNode(const sampledShmIntCounterReqNode &src,
1922 process *childProc, metricDefinitionNode *mi,
1924 // a dup() routine (call after a fork())
1925 // Assumes that "childProc" has been copied already (e.g., the shm seg was copied).
1927 // Note that the index w/in the inferior heap remains the same, so setting the
1928 // new inferiorCounterPtr isn't too hard. Actually, it's trivial, since other code
1929 // ensures that the new shm segment is placed in exactly the same virtual mem location
1930 // as the previous one.
1932 // Note that the fastInferiorHeap class's fork ctor will have already copied the
1933 // actual data; we need to fill in new meta-data (new houseKeeping entries).
1935 this->allocatedIndex = src.allocatedIndex;
1937 this->theSampleId = iCounterId; // this is different from the parent's value
1938 this->initialValue = src.initialValue;
1940 fastInferiorHeap<intCounterHK, intCounter> &theHeap =
1941 childProc->getInferiorIntCounters();
1943 // since the new shm seg is placed in exactly the same memory location as the old
1944 // one, nothing here should change.
1945 intCounter *oldInferiorCounterPtr = src.inferiorCounterPtr;
1946 inferiorCounterPtr = theHeap.index2InferiorAddr(allocatedIndex);
1947 assert(inferiorCounterPtr == oldInferiorCounterPtr);
1949 // write to the raw item in the inferior heap:
1950 intCounter *localCounterPtr = theHeap.index2LocalAddr(allocatedIndex);
1951 const intCounter *localSrcCounterPtr = childProc->getParent()->getInferiorIntCounters().index2LocalAddr(allocatedIndex);
1952 localCounterPtr->value = initialValue;
1953 localCounterPtr->id.id = theSampleId;
1954 localCounterPtr->theSpinner = localSrcCounterPtr->theSpinner;
1955 // in case we're in the middle of an operation
1957 // write HK for this intCounter:
1958 // Note: we don't assert anything about mi->getMId(), because that id has no
1959 // relation to the ids we work with (theSampleId). In fact, we (the sampling code)
1960 // just don't ever care what mi->getMId() is.
1961 assert(theSampleId >= 0);
1962 assert(midToMiMap.defines(theSampleId));
1963 assert(midToMiMap[theSampleId] == mi);
1964 intCounterHK iHKValue(theSampleId, mi);
1965 // the mi differs from the mi of the parent; theSampleId differs too.
1966 theHeap.initializeHKAfterFork(allocatedIndex, iHKValue);
1968 #if defined(MT_THREAD)
1974 sampledShmIntCounterReqNode::dup(process *childProc,
1975 metricDefinitionNode *mi,
1977 const dictionary_hash<instInstance*,instInstance*> &
1979 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
1981 sampledShmIntCounterReqNode *tmp;
1982 tmp = new sampledShmIntCounterReqNode(*this, childProc, mi, iCounterId);
1985 #if defined(MT_THREAD)
1986 // initialize position for new counter id with the same value as the
1987 // position for the "parent"
1989 const process *parent = childProc->getParent();
1991 Thread *thr = parent->threads[0];
1992 childProc->threads[0]->CTvector->dup(tmp->theSampleId, iCounterId, thr, tmp->position_);
1998 bool sampledShmIntCounterReqNode::insertInstrumentation(process *theProc,
1999 metricDefinitionNode *iMi) {
2000 // Remember counterPtr is NULL until this routine gets called.
2001 // WARNING: there will be an assert failure if the applic hasn't yet attached to the
2004 // initialize the intCounter in the inferior heap
2006 iValue.id.id = this->theSampleId;
2007 iValue.value = this->initialValue; // what about initializing 'theSpinner'???
2009 intCounterHK iHKValue(this->theSampleId, iMi);
2011 fastInferiorHeap<intCounterHK, intCounter> &theShmHeap =
2012 theProc->getInferiorIntCounters();
2014 if (!theShmHeap.alloc(iValue, iHKValue, this->allocatedIndex))
2015 return false; // failure
2017 inferiorCounterPtr = theShmHeap.getBaseAddrInApplic() + allocatedIndex;
2018 // ptr arith. Now we know where in the inferior heap this counter is
2019 // attached to, so getInferiorPtr() can work ok.
2021 assert(inferiorCounterPtr == theShmHeap.index2InferiorAddr(allocatedIndex));
2022 // just a check for fun
2024 #if defined(MT_THREAD)
2025 updateCounterTimerVectorMT(theProc,this->theSampleId,this->position_,getInferiorPtr());
2028 return true; // success
2031 void sampledShmIntCounterReqNode::disable(process *theProc,
2032 const vector<unsigVecType> &pointsToCheck) {
2033 // We used to remove the sample id from midToMiMap here but now the caller is
2034 // responsible for that.
2036 fastInferiorHeap<intCounterHK, intCounter> &theShmHeap =
2037 theProc->getInferiorIntCounters();
2039 // Remove from inferior heap; make sure we won't be sampled any more:
2040 vector<unsigned> trampsMaybeUsing;
2041 for (unsigned pointlcv=0; pointlcv < pointsToCheck.size(); pointlcv++)
2042 for (unsigned tramplcv=0; tramplcv < pointsToCheck[pointlcv].size(); tramplcv++)
2043 trampsMaybeUsing += pointsToCheck[pointlcv][tramplcv];
2045 theShmHeap.makePendingFree(allocatedIndex, trampsMaybeUsing);
2047 #if defined(MT_THREAD)
2048 Thread *thr = theProc->threads[0];
2049 thr->CTvector->remove(this->theSampleId, this->position_);
2055 /* ************************************************************************* */
2057 nonSampledIntCounterReqNode::nonSampledIntCounterReqNode(int iValue, int iCounterId) :
2059 theSampleId = iCounterId;
2060 initialValue = iValue;
2062 // The following fields are NULL until insertInstrumentation()
2065 #if defined(MT_THREAD)
2070 nonSampledIntCounterReqNode::
2071 nonSampledIntCounterReqNode(const nonSampledIntCounterReqNode &src,
2072 process *childProc, metricDefinitionNode *,
2074 // a dup() routine (call after a fork())
2075 counterPtr = src.counterPtr; // assumes addr spaces have been dup()d.
2076 initialValue = src.initialValue;
2077 theSampleId = iCounterId;
2080 temp.id.id = this->theSampleId;
2081 temp.value = this->initialValue;
2082 writeToInferiorHeap(childProc, temp);
2084 #if defined(MT_THREAD)
2090 nonSampledIntCounterReqNode::dup(process *childProc,
2091 metricDefinitionNode *mi,
2093 const dictionary_hash<instInstance*,instInstance*> &
2095 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2097 nonSampledIntCounterReqNode *tmp;
2098 tmp = new nonSampledIntCounterReqNode(*this, childProc, mi, iCounterId);
2101 #if defined(MT_THREAD)
2102 // initialize position for new counter id with the same value as the
2103 // position for the "parent"
2105 const process *parent = childProc->getParent();
2107 Thread *thr = parent->threads[0];
2108 childProc->threads[0]->CTvector->dup(tmp->theSampleId, iCounterId, thr, tmp->position_);
2114 bool nonSampledIntCounterReqNode::insertInstrumentation(process *theProc,
2115 metricDefinitionNode *) {
2116 // Remember counterPtr is NULL until this routine gets called.
2117 counterPtr = (intCounter*)inferiorMalloc(theProc, sizeof(intCounter), dataHeap);
2118 if (counterPtr == NULL)
2119 return false; // failure!
2121 // initialize the intCounter in the inferior heap
2123 temp.id.id = this->theSampleId;
2124 temp.value = this->initialValue;
2126 writeToInferiorHeap(theProc, temp);
2128 #if defined(MT_THREAD)
2129 updateCounterTimerVectorMT(theProc,this->theSampleId,this->position_,getInferiorPtr());
2132 return true; // success
2135 void nonSampledIntCounterReqNode::disable(process *theProc,
2136 const vector<unsigVecType> &pointsToCheck) {
2137 // We used to remove the sample id from midToMiMap here but now the caller is
2138 // responsible for that.
2140 // Deallocate space for intCounter in the inferior heap:
2141 assert(counterPtr != NULL);
2142 inferiorFree(theProc, (unsigned)counterPtr, dataHeap, pointsToCheck);
2144 #if defined(MT_THREAD)
2145 Thread *thr = theProc->threads[0];
2146 thr->CTvector->remove(this->theSampleId, this->position_);
2150 void nonSampledIntCounterReqNode::writeToInferiorHeap(process *theProc,
2151 const intCounter &dataSrc) const {
2152 // using the contents of "dataSrc", write to the inferior heap at loc
2153 // "counterPtr" via proc->writeDataSpace()
2155 theProc->writeDataSpace(counterPtr, sizeof(intCounter), &dataSrc);
2158 /* ****************************************************************** */
2160 #ifndef SHM_SAMPLING
2161 sampledTimerReqNode::sampledTimerReqNode(timerType iType, int iCounterId) :
2163 theSampleId = iCounterId;
2164 theTimerType = iType;
2166 // The following fields are NULL until insertInstrumentatoin():
2170 #if defined(MT_THREAD)
2175 sampledTimerReqNode::sampledTimerReqNode(const sampledTimerReqNode &src,
2177 metricDefinitionNode *,
2179 const dictionary_hash<instInstance*,instInstance*> &map) {
2180 // a dup()-like routine; call after a fork()
2181 timerPtr = src.timerPtr; // assumes addr spaces have been dup()'d
2183 if (!map.find(src.sampler, this->sampler)) // writes to this->sampler
2186 assert(sampler); // makes sense; timers are always sampled, whereas intCounters
2187 // might be just non-sampled predicates.
2189 theSampleId = iCounterId;
2190 theTimerType = src.theTimerType;
2193 P_memset(&temp, '\0', sizeof(tTimer)); /* is this needed? */
2194 temp.id.id = this->theSampleId;
2195 temp.type = this->theTimerType;
2196 temp.normalize = 1000000;
2197 writeToInferiorHeap(childProc, temp);
2199 #if defined(MT_THREAD)
2203 // WARNING: shouldn't we be resetting the raw value to count=0, start=0,
2204 // total = src.initialValue ??? On the other hand, it's not that
2205 // simple -- if the timer is active in the parent, then it'll be active
2206 // in the child. So how about setting count to src.count, start=now,
2211 sampledTimerReqNode::dup(process *childProc, metricDefinitionNode *mi,
2213 const dictionary_hash<instInstance*,instInstance*> &map
2215 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2217 sampledTimerReqNode *result = new sampledTimerReqNode(*this, childProc, mi, iCounterId, map);
2220 return NULL; // on failure, return w/o incrementing counterId
2222 #if defined(MT_THREAD)
2223 // initialize position for new counter id with the same value as the
2224 // position for the "parent"
2226 const process *parent = childProc->getParent();
2228 Thread *thr = parent->threads[0];
2229 childProc->threads[0]->CTvector->dup(result->theSampleId, iCounterId, thr, result->position_);
2235 bool sampledTimerReqNode::insertInstrumentation(process *theProc,
2236 metricDefinitionNode *) {
2237 timerPtr = (tTimer *)inferiorMalloc(theProc, sizeof(tTimer), dataHeap);
2238 if (timerPtr == NULL)
2239 return false; // failure!
2241 // Now let's initialize the newly allocated tTimer in the inferior heap:
2243 P_memset(&temp, '\0', sizeof(tTimer));
2244 temp.id.id = this->theSampleId;
2245 temp.type = this->theTimerType;
2246 temp.normalize = 1000000;
2247 writeToInferiorHeap(theProc, temp);
2249 // Now instrument DYNINSTreportTimer:
2250 pdFunction *sampleFunction = theProc->findOneFunction("DYNINSTsampleValues");
2251 assert(sampleFunction);
2254 tmp = new AstNode(AstNode::Constant, timerPtr);
2255 ast = new AstNode("DYNINSTreportTimer", tmp);
2258 instPoint *func_entry = sampleFunction->funcEntry(theProc);
2259 sampler = addInstFunc(theProc, func_entry, ast,
2260 callPreInsn, orderLastAtPoint, false);
2263 #if defined(MT_THREAD)
2264 updateCounterTimerVectorMT(theProc,this->theSampleId,this->position_,getInferiorPtr());
2267 return true; // successful
2270 void sampledTimerReqNode::disable(process *theProc,
2271 const vector<unsigVecType> &pointsToCheck) {
2272 // We used to remove the sample id from midToMiMap here but now the caller is
2273 // responsible for that.
2275 // Remove instrumentation added to DYNINSTsampleValues(), if necessary:
2276 if (sampler != NULL)
2277 ::deleteInst(sampler, getAllTrampsAtPoint(sampler));
2279 // Deallocate space for tTimer in the inferior heap:
2281 inferiorFree(theProc, (unsigned)timerPtr, dataHeap, pointsToCheck);
2283 #if defined(MT_THREAD)
2284 Thread *thr = theProc->threads[0];
2285 thr->CTvector->remove(this->theSampleId, this->position_);
2289 void sampledTimerReqNode::writeToInferiorHeap(process *theProc,
2290 const tTimer &dataSrc) const {
2291 // using contents of "dataSrc", a local copy of the data,
2292 // write to inferior heap at loc "timerPtr" via proc->writeDataSpace()
2294 theProc->writeDataSpace(timerPtr, sizeof(tTimer), &dataSrc);
2297 bool sampledTimerReqNode::
2298 unFork(dictionary_hash<instInstance*,instInstance*> &map) {
2299 instInstance *parentSamplerInstance = sampler;
2301 instInstance *childSamplerInstance;
2302 if (!map.find(parentSamplerInstance, childSamplerInstance))
2305 vector<unsigned> pointsToCheck; // empty
2306 deleteInst(childSamplerInstance, pointsToCheck);
2308 map[parentSamplerInstance] = NULL; // since we've deleted...
2315 /* ****************************************************************** */
2318 sampledShmWallTimerReqNode::sampledShmWallTimerReqNode(int iCounterId) :
2320 theSampleId = iCounterId;
2322 // The following fields are NULL until insertInstrumentation():
2323 allocatedIndex = UINT_MAX;
2324 inferiorTimerPtr = NULL;
2327 sampledShmWallTimerReqNode::
2328 sampledShmWallTimerReqNode(const sampledShmWallTimerReqNode &src,
2330 metricDefinitionNode *mi,
2332 // a dup()-like routine; call after a fork().
2333 // Assumes that the "childProc" has been duplicated already
2335 // Note that the index w/in the inferior heap remains the same, so setting the new
2336 // inferiorTimerPtr isn't too hard. Actually, it's trivial, since other code
2337 // ensures that the new shm segment is placed in exactly the same virtual mem loc
2338 // as the previous one.
2340 // Note that the fastInferiorHeap class's fork ctor will have already copied the
2341 // actual data; we need to fill in new meta-data (new houseKeeping entries).
2343 allocatedIndex = src.allocatedIndex;
2344 theSampleId = iCounterId;
2345 assert(theSampleId != src.theSampleId);
2347 fastInferiorHeap<wallTimerHK, tTimer> &theHeap =
2348 childProc->getInferiorWallTimers();
2350 // since the new shm seg is placed in exactly the same memory location as the old
2351 // one, nothing here should change.
2352 tTimer *oldInferiorTimerPtr = src.inferiorTimerPtr;
2353 inferiorTimerPtr = theHeap.index2InferiorAddr(allocatedIndex);
2354 assert(inferiorTimerPtr == oldInferiorTimerPtr);
2356 // Write new raw value in the inferior heap:
2357 // we set localTimerPtr as follows: protector1 and procetor2 should be copied from
2358 // src. total should be reset to 0. start should be set to now if active else 0.
2359 // counter should be copied from the source.
2360 // NOTE: SINCE WE COPY FROM THE SOURCE, IT'S IMPORTANT THAT ON A FORK, BOTH THE
2361 // PARENT AND CHILD ARE PAUSED UNTIL WE COPY THINGS OVER. THAT THE CHILD IS
2362 // PAUSED IS NOTHING NEW; THAT THE PARENT SHOULD BE PAUSED IS NEW NEWS!
2364 tTimer *localTimerPtr = theHeap.index2LocalAddr(allocatedIndex);
2365 const tTimer *srcTimerPtr = childProc->getParent()->getInferiorWallTimers().index2LocalAddr(allocatedIndex);
2367 localTimerPtr->total = 0;
2368 localTimerPtr->counter = srcTimerPtr->counter;
2369 localTimerPtr->id.id = theSampleId;
2370 localTimerPtr->protector1 = srcTimerPtr->protector1;
2371 localTimerPtr->protector2 = srcTimerPtr->protector2;
2373 if (localTimerPtr->counter == 0)
2374 // inactive timer...this is the easy case to copy
2375 localTimerPtr->start = 0; // undefined, really
2377 // active timer...don't copy the start time from the source...make it 'now'
2378 localTimerPtr->start = getCurrWallTime();
2380 // write new HK for this tTimer:
2381 // Note: we don't assert anything about mi->getMId(), because that id has no
2382 // relation to the ids we work with (theSampleId). In fact, we (the sampling code)
2383 // just don't ever care what mi->getMId() is.
2384 assert(theSampleId >= 0);
2385 assert(midToMiMap.defines(theSampleId));
2386 assert(midToMiMap[theSampleId] == mi);
2387 wallTimerHK iHKValue(theSampleId, mi, 0); // is last param right?
2388 // the mi should differ from the mi of the parent; theSampleId differs too.
2389 theHeap.initializeHKAfterFork(allocatedIndex, iHKValue);
2391 #if defined(MT_THREAD)
2397 sampledShmWallTimerReqNode::dup(process *childProc,
2398 metricDefinitionNode *mi,
2400 const dictionary_hash<instInstance*,instInstance*> &
2402 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2404 sampledShmWallTimerReqNode *tmp;
2405 tmp = new sampledShmWallTimerReqNode(*this, childProc, mi, iCounterId);
2408 #if defined(MT_THREAD)
2409 // initialize position for new counter id with the same value as the
2410 // position for the "parent"
2412 const process *parent = childProc->getParent();
2414 Thread *thr = parent->threads[0];
2415 childProc->threads[0]->CTvector->dup(tmp->theSampleId, iCounterId, thr, tmp->position_);
2421 bool sampledShmWallTimerReqNode::insertInstrumentation(process *theProc,
2422 metricDefinitionNode *iMi) {
2423 // Remember inferiorTimerPtr is NULL until this routine gets called.
2424 // WARNING: there will be an assert failure if the applic hasn't yet attached to the
2427 // initialize the tTimer in the inferior heap
2429 P_memset(&iValue, '\0', sizeof(tTimer));
2430 iValue.id.id = this->theSampleId;
2432 wallTimerHK iHKValue(this->theSampleId, iMi, 0);
2434 fastInferiorHeap<wallTimerHK, tTimer> &theShmHeap =
2435 theProc->getInferiorWallTimers();
2437 if (!theShmHeap.alloc(iValue, iHKValue, this->allocatedIndex))
2438 return false; // failure
2440 inferiorTimerPtr = theShmHeap.getBaseAddrInApplic() + allocatedIndex;
2441 // ptr arith. Now we know where in the inferior heap this counter is
2442 // attached to, so getInferiorPtr() can work ok.
2444 assert(inferiorTimerPtr == theShmHeap.index2InferiorAddr(allocatedIndex));
2445 // just a check for fun
2447 #if defined(MT_THREAD)
2448 updateCounterTimerVectorMT(theProc,this->theSampleId,this->position_,getInferiorPtr());
2454 void sampledShmWallTimerReqNode::disable(process *theProc,
2455 const vector<unsigVecType> &pointsToCheck) {
2456 // We used to remove the sample id from midToMiMap here but now the caller is
2457 // responsible for that.
2459 fastInferiorHeap<wallTimerHK, tTimer> &theShmHeap = theProc->getInferiorWallTimers();
2461 // Remove from inferior heap; make sure we won't be sampled any more:
2462 vector<unsigned> trampsMaybeUsing;
2463 for (unsigned pointlcv=0; pointlcv < pointsToCheck.size(); pointlcv++)
2464 for (unsigned tramplcv=0; tramplcv < pointsToCheck[pointlcv].size(); tramplcv++)
2465 trampsMaybeUsing += pointsToCheck[pointlcv][tramplcv];
2467 theShmHeap.makePendingFree(allocatedIndex, trampsMaybeUsing);
2469 #if defined(MT_THREAD)
2470 Thread *thr = theProc->threads[0];
2471 thr->CTvector->remove(this->theSampleId, this->position_);
2475 /* ****************************************************************** */
2477 sampledShmProcTimerReqNode::sampledShmProcTimerReqNode(int iCounterId) :
2479 theSampleId = iCounterId;
2481 // The following fields are NULL until insertInstrumentatoin():
2482 allocatedIndex = UINT_MAX;
2483 inferiorTimerPtr = NULL;
2485 #if defined(MT_THREAD)
2490 sampledShmProcTimerReqNode::
2491 sampledShmProcTimerReqNode(const sampledShmProcTimerReqNode &src,
2493 metricDefinitionNode *mi,
2495 // a dup()-like routine; call after a fork()
2496 // Assumes that the "childProc" has been duplicated already
2498 // Note that the index w/in the inferior heap remains the same, so setting the new
2499 // inferiorTimerPtr isn't too hard. Actually, it's trivial, since other code
2500 // ensures that the new shm segment is placed in exactly the same virtual mem loc
2501 // as the previous one.
2503 // Note that the fastInferiorHeap class's fork ctor will have already copied the
2504 // actual data; we need to fill in new meta-data (new houseKeeping entries).
2506 allocatedIndex = src.allocatedIndex;
2507 theSampleId = iCounterId;
2508 assert(theSampleId != src.theSampleId);
2510 fastInferiorHeap<processTimerHK, tTimer> &theHeap =
2511 childProc->getInferiorProcessTimers();
2513 // since the new shm seg is placed in exactly the same memory location as the old
2514 // one, nothing here should change.
2515 tTimer *oldInferiorTimerPtr = src.inferiorTimerPtr;
2516 inferiorTimerPtr = theHeap.index2InferiorAddr(allocatedIndex);
2517 assert(inferiorTimerPtr == oldInferiorTimerPtr);
2519 // Write new raw value:
2520 // we set localTimerPtr as follows: protector1 and procetor2 should be copied from
2521 // src. total should be reset to 0. start should be set to now if active else 0.
2522 // counter should be copied from the source.
2523 // NOTE: SINCE WE COPY FROM THE SOURCE, IT'S IMPORTANT THAT ON A FORK, BOTH THE
2524 // PARENT AND CHILD ARE PAUSED UNTIL WE COPY THINGS OVER. THAT THE CHILD IS
2525 // PAUSED IS NOTHING NEW; THAT THE PARENT SHOULD BE PAUSED IS NEW NEWS!
2527 tTimer *localTimerPtr = theHeap.index2LocalAddr(allocatedIndex);
2528 const tTimer *srcTimerPtr = childProc->getParent()->getInferiorProcessTimers().index2LocalAddr(allocatedIndex);
2530 localTimerPtr->total = 0;
2531 localTimerPtr->counter = srcTimerPtr->counter;
2532 localTimerPtr->id.id = theSampleId;
2533 localTimerPtr->protector1 = srcTimerPtr->protector1;
2534 localTimerPtr->protector2 = srcTimerPtr->protector2;
2536 if (localTimerPtr->counter == 0)
2537 // inactive timer...this is the easy case to copy
2538 localTimerPtr->start = 0; // undefined, really
2540 // active timer...don't copy the start time from the source...make it 'now'
2541 localTimerPtr->start = childProc->getInferiorProcessCPUtime();
2543 // Write new HK for this tTimer:
2544 // Note: we don't assert anything about mi->getMId(), because that id has no
2545 // relation to the ids we work with (theSampleId). In fact, we (the sampling code)
2546 // just don't ever care what mi->getMId() is.
2547 assert(theSampleId >= 0);
2548 assert(midToMiMap.defines(theSampleId));
2549 assert(midToMiMap[theSampleId] == mi);
2550 processTimerHK iHKValue(theSampleId, mi, 0); // is last param right?
2551 // the mi differs from the mi of the parent; theSampleId differs too.
2552 theHeap.initializeHKAfterFork(allocatedIndex, iHKValue);
2554 #if defined(MT_THREAD)
2560 sampledShmProcTimerReqNode::dup(process *childProc,
2561 metricDefinitionNode *mi,
2563 const dictionary_hash<instInstance*,instInstance*> &
2565 // duplicate 'this' (allocate w/ new) and return. Call after a fork().
2567 sampledShmProcTimerReqNode *tmp;
2568 tmp = new sampledShmProcTimerReqNode(*this, childProc, mi, iCounterId);
2571 #if defined(MT_THREAD)
2572 // initialize position for new counter id with the same value as the
2573 // position for the "parent"
2575 const process *parent = childProc->getParent();
2577 Thread *thr = parent->threads[0];
2578 childProc->threads[0]->CTvector->dup(tmp->theSampleId, iCounterId, thr, tmp->position_);
2584 bool sampledShmProcTimerReqNode::insertInstrumentation(process *theProc,
2585 metricDefinitionNode *iMi) {
2586 // Remember inferiorTimerPtr is NULL until this routine gets called.
2587 // WARNING: there will be an assert failure if the applic hasn't yet attached to the
2590 // initialize the tTimer in the inferior heap
2592 P_memset(&iValue, '\0', sizeof(tTimer));
2593 iValue.id.id = this->theSampleId;
2595 processTimerHK iHKValue(this->theSampleId, iMi, 0);
2597 fastInferiorHeap<processTimerHK, tTimer> &theShmHeap =
2598 theProc->getInferiorProcessTimers();
2600 if (!theShmHeap.alloc(iValue, iHKValue, this->allocatedIndex))
2601 return false; // failure
2603 inferiorTimerPtr = theShmHeap.getBaseAddrInApplic() + allocatedIndex;
2604 // ptr arith. Now we know where in the inferior heap this counter is
2605 // attached to, so getInferiorPtr() can work ok.
2607 assert(inferiorTimerPtr == theShmHeap.index2InferiorAddr(allocatedIndex));
2608 // just a check for fun
2610 #if defined(MT_THREAD)
2611 updateCounterTimerVectorMT(theProc,this->theSampleId,this->position_,getInferiorPtr());
2617 void sampledShmProcTimerReqNode::disable(process *theProc,
2618 const vector<unsigVecType> &pointsToCheck) {
2619 // We used to remove the sample id from midToMiMap here but now the caller is
2620 // responsible for that.
2622 fastInferiorHeap<processTimerHK, tTimer> &theShmHeap =
2623 theProc->getInferiorProcessTimers();
2625 // Remove from inferior heap; make sure we won't be sampled any more:
2626 vector<unsigned> trampsMaybeUsing;
2627 for (unsigned pointlcv=0; pointlcv < pointsToCheck.size(); pointlcv++)
2628 for (unsigned tramplcv=0; tramplcv < pointsToCheck[pointlcv].size(); tramplcv++)
2629 trampsMaybeUsing += pointsToCheck[pointlcv][tramplcv];
2631 theShmHeap.makePendingFree(allocatedIndex, trampsMaybeUsing);
2633 #if defined(MT_THREAD)
2634 Thread *thr = theProc->threads[0];
2635 thr->CTvector->remove(this->theSampleId, this->position_);
2640 /* **************************** */
2642 void reportInternalMetrics(bool force)
2644 if (isApplicationPaused())
2645 return; // we don't sample when paused (is this right?)
2647 static timeStamp end=0.0;
2649 // see if we have a sample to establish time base.
2650 if (!firstRecordTime) {
2651 cerr << "reportInternalMetrics: no because firstRecordTime==0" << endl;
2656 end = (timeStamp)firstRecordTime/MILLION;
2658 const timeStamp now = getCurrentTime(false);
2660 // check if it is time for a sample
2661 if (!force && now < end + samplingRate) {
2662 // cerr << "reportInternalMetrics: no because now < end + samplingRate (end=" << end << "; samplingRate=" << samplingRate << "; now=" << now << ")" << endl;
2663 // cerr << "difference is " << (end+samplingRate-now) << endl;
2667 timeStamp start = end;
2670 // TODO -- clean me up, please
2672 unsigned ai_size = internalMetric::allInternalMetrics.size();
2673 for (unsigned u=0; u<ai_size; u++) {
2674 internalMetric *theIMetric = internalMetric::allInternalMetrics[u];
2675 // Loop thru all enabled instances of this internal metric...
2677 for (unsigned v=0; v < theIMetric->num_enabled_instances(); v++) {
2678 internalMetric::eachInstance &theInstance = theIMetric->getEnabledInstance(v);
2679 // not "const" since bumpCumulativeValueBy() may be called
2681 sampleValue value = 0;
2682 if (theIMetric->name() == "active_processes") {
2683 //value = (end - start) * activeProcesses;
2684 value = (end - start) * theInstance.getValue();
2685 } else if (theIMetric->name() == "bucket_width") {
2686 //value = (end - start)* theInstance.getValue();
2687 // I would prefer to use (end-start) * theInstance.getValue(); however,
2688 // we've had some problems getting setValue() called in time, thus
2689 // leaving us with getValues() of 0 sometimes. See longer comment in dynrpc.C --ari
2690 extern float currSamplingRate;
2691 value = (end - start) * currSamplingRate;
2692 } else if (theIMetric->name() == "number_of_cpus") {
2693 value = (end - start) * numberOfCPUs;
2694 } else if (theIMetric->name() == "total_CT") {
2695 value = (end - start) * internalMetricCounterId;
2697 } else if (theIMetric->name() == "active_CT") {
2698 value = (end - start) * activeCT;
2700 } else if (theIMetric->name() == "mem_CT") {
2701 value = (end - start) * memCT;
2703 } else if (theIMetric->name() == "infHeapMemAvailable") {
2704 value = (end - start) * inferiorMemAvailable;
2706 } else if (theIMetric->style() == EventCounter) {
2707 value = theInstance.getValue();
2708 // assert((value + 0.0001) >= imp->cumulativeValue);
2709 value -= theInstance.getCumulativeValue();
2710 theInstance.bumpCumulativeValueBy(value);
2711 } else if (theIMetric->style() == SampledFunction) {
2712 value = theInstance.getValue();
2715 theInstance.report(start, end, value);
2716 // calls metricDefinitionNode->forwardSimpleValue()
2721 void disableAllInternalMetrics() {
2722 for (unsigned u=0; u < internalMetric::allInternalMetrics.size(); u++) {
2723 internalMetric *theIMetric = internalMetric::allInternalMetrics[u];
2725 // Now loop thru all the enabled instances of this internal metric...
2726 while (theIMetric->num_enabled_instances() > 0) {
2727 internalMetric::eachInstance &theInstance = theIMetric->getEnabledInstance(0);
2728 tp->endOfDataCollection(theInstance.getMId());
2729 theIMetric->disableInstance(0);
2734 #if defined(MT_THREAD)
2736 void dataReqNode::updateCounterTimerVectorMT(process *proc, int CTid,
2740 #if defined(MT_DEBUG)
2741 sprintf(errorLine,"(pid=%d) Creating new Counter/Timer, CTid is %d\n",proc->pid, CTid);
2745 unsigned tableAddr,addr;
2748 assert(CTaddr && proc);
2750 // Getting thread table address
2751 tableAddr = proc->findInternalAddress("DYNINSTthreadTable",true, err);
2754 #if defined(MT_DEBUG)
2755 sprintf(errorLine,"DYNINSTthreadTable address is 0x%x\n",tableAddr);
2760 // This should be a parameter to this function for the multi-threaded case
2761 Thread *thr = proc->threads[0];
2763 // Find the right CTvector address for this thread in the threadTable
2764 tableAddr += thr->get_pos()*sizeof(unsigned);
2766 // Check if the CTvector has enough room for another element. If not,
2767 // allocate more memory for it.
2768 // NOTE: for the final implementation, this has to be done for every
2770 ok = thr->CTvector->update(tableAddr);
2773 // Read address of vector of counter/timers for this thread
2774 proc->readDataSpace((caddr_t) tableAddr,
2776 (caddr_t) &addr,true);
2779 thr->CTvector->add(CTid, position);
2781 // Save pointer to Counter/Timer in table of counter/timers
2782 // (using CTid as offset)
2783 addr += sizeof(unsigned)*(position);
2784 proc->writeDataSpace((caddr_t) addr, sizeof(unsigned),