Turns off gap parsing for Dyninst's runtime library and removes compiler warnings
[dyninst.git] / dyninstAPI / src / function.C
1 /*
2  * Copyright (c) 1996-2009 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as "Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * By your use of Paradyn, you understand and agree that we (or any
12  * other person or entity with proprietary rights in Paradyn) are
13  * under no obligation to provide either maintenance services,
14  * update services, notices of latent defects, or correction of
15  * defects for Paradyn.
16  * 
17  * This library is free software; you can redistribute it and/or
18  * modify it under the terms of the GNU Lesser General Public
19  * License as published by the Free Software Foundation; either
20  * version 2.1 of the License, or (at your option) any later version.
21  * 
22  * This library is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * Lesser General Public License for more details.
26  * 
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30  */
31  
32 // $Id: function.C,v 1.10 2005/03/02 19:44:45 bernat Exp 
33
34 #include "function.h"
35 #include "process.h"
36 #include "instPoint.h"
37 #include "multiTramp.h"
38
39 #include "mapped_object.h"
40 #include "mapped_module.h"
41 #include "InstructionDecoder.h"
42 #include "parseAPI/src/InstrucIter.h"
43
44 #if defined(cap_relocation)
45 #include "reloc-func.h"
46 #endif
47
48 //std::string int_function::emptyString("");
49
50 #include "Parsing.h"
51
52 using namespace Dyninst;
53 using namespace Dyninst::ParseAPI;
54
55
56 int int_function_count = 0;
57
58 // 
59 int_function::int_function(image_func *f,
60                            Address baseAddr,
61                            mapped_module *mod) :
62     ifunc_(f),
63     mod_(mod),
64     blockIDmap(intHash),
65     handlerFaultAddr_(0),
66     handlerFaultAddrAddr_(0), 
67     isBeingInstrumented_(false),
68     instPsByAddr_(addrHash4),
69 #if defined(cap_relocation)
70     generatedVersion_(0),
71     installedVersion_(0),
72     linkedVersion_(0),
73 #endif
74     version_(0)
75 #if defined(os_windows) 
76    , callingConv(unknown_call)
77    , paramSize(0)
78 #endif
79 {
80 #if defined(ROUGH_MEMORY_PROFILE)
81     int_function_count++;
82     if ((int_function_count % 1000) == 0)
83         fprintf(stderr, "int_function_count: %d (%d)\n",
84                 int_function_count, int_function_count*sizeof(int_function));
85 #endif
86     
87
88     addr_ = f->getOffset() + baseAddr;
89     ptrAddr_ = (f->getPtrOffset() ? f->getPtrOffset() + baseAddr : 0);
90
91     parsing_printf("%s: creating new proc-specific function at 0x%lx\n",
92                    symTabName().c_str(), addr_);
93
94      // We delay the creation of instPoints until they are requested;
95     // this saves memory, and really, until something is asked for we
96     // don't need it.  TODO: creation of an arbitrary instPoint should
97     // trigger instPoint creation; someone may create an arbitrary at
98     // a entry/exit/callsite.
99
100     // Same with the flowGraph; we clone it from the image_func when
101     // we need it.
102     
103     /* IA-64: create the cached allocs lazily. */
104 }
105
106 int_function::int_function(const int_function *parFunc,
107                            mapped_module *childMod,
108                            process *childP) :
109     addr_(parFunc->addr_),
110     ptrAddr_(parFunc->ptrAddr_),
111     ifunc_(parFunc->ifunc_),
112     mod_(childMod),
113     blockIDmap(intHash),
114     handlerFaultAddr_(0),
115     handlerFaultAddrAddr_(0), 
116     isBeingInstrumented_(parFunc->isBeingInstrumented_),
117     instPsByAddr_(addrHash4),
118 #if defined(cap_relocation)
119     generatedVersion_(parFunc->generatedVersion_),
120     installedVersion_(parFunc->installedVersion_),
121     linkedVersion_(parFunc->linkedVersion_),
122 #endif
123     version_(parFunc->version_)
124  {
125      unsigned i; // Windows hates "multiple definitions"
126
127      // Construct the raw blocklist;
128      set< int_basicBlock* , int_basicBlock::compare >::const_iterator 
129          bIter = parFunc->blockList.begin();
130      for (i=0; bIter != parFunc->blockList.end(); i++,bIter++) {
131          int_basicBlock *block = new int_basicBlock((*bIter), this,i);
132          blockList.insert(block);
133      }
134      nextBlockID = i;
135      // got the same blocks in the same order as the parent, so this is safe:
136      blockIDmap = parFunc->blockIDmap;
137      
138      for (i = 0; i < parFunc->entryPoints_.size(); i++) {
139          instPoint *parP = parFunc->entryPoints_[i];
140          int_basicBlock *block = findBlockByAddr(parP->addr());
141          assert(block);
142          instPoint *childIP = instPoint::createForkedPoint(parP, block, childP);
143          entryPoints_.push_back(childIP);
144      }
145
146      for (i = 0; i < parFunc->exitPoints_.size(); i++) {
147          instPoint *parP = parFunc->exitPoints_[i];
148          int_basicBlock *block = findBlockByAddr(parP->addr());
149          assert(block);
150          instPoint *childIP = instPoint::createForkedPoint(parP, block, childP);
151          exitPoints_.push_back(childIP);
152      }
153
154      for (i = 0; i < parFunc->callPoints_.size(); i++) {
155          instPoint *parP = parFunc->callPoints_[i];
156          int_basicBlock *block = findBlockByAddr(parP->addr());
157          assert(block);
158          instPoint *childIP = instPoint::createForkedPoint(parP, block, childP);
159          callPoints_.push_back(childIP);
160      }
161
162      for (i = 0; i < parFunc->arbitraryPoints_.size(); i++) {
163          instPoint *parP = parFunc->arbitraryPoints_[i];
164          int_basicBlock *block = findBlockByAddr(parP->addr());
165          assert(block);
166          instPoint *childIP = instPoint::createForkedPoint(parP, block, childP);
167          arbitraryPoints_.push_back(childIP);
168      }
169
170      set<instPoint*>::const_iterator pIter;
171      for(pIter = parFunc->unresolvedPoints_.begin(); 
172          pIter != parFunc->unresolvedPoints_.end(); pIter++) 
173      {
174          instPoint *parP = *pIter;
175          int_basicBlock *block = findBlockByAddr(parP->addr());
176          assert(block);
177          instPoint *childIP = instPoint::createForkedPoint(parP, block, childP);
178          unresolvedPoints_.insert(childIP);
179      }
180
181      for(pIter = parFunc->abruptEnds_.begin(); 
182          pIter != parFunc->abruptEnds_.end(); pIter++) 
183      {
184          instPoint *parP = *pIter;
185          int_basicBlock *block = findBlockByAddr(parP->addr());
186          assert(block);
187          instPoint *childIP = instPoint::createForkedPoint(parP, block, childP);
188          abruptEnds_.insert(childIP);
189      }
190
191      // TODO: relocated functions
192 }
193
194 int_function::~int_function() { 
195     // ifunc_ doesn't keep tabs on us, so don't need to let it know.
196     // mod_ is cleared by the mapped_object
197     // blockList isn't allocated
198
199     // instPoints are process level (should be deleted here and refcounted)
200     // DEMO: incorrectly delete instPoints here
201
202     // some points are in multiple categories, keep the pointers around
203     // in delPoints so we don't double-free
204     std::set<instPoint*> delPoints;
205
206     for (unsigned i = 0; i < entryPoints_.size(); i++) {
207         delPoints.insert(entryPoints_[i]);
208     }
209     for (unsigned i = 0; i < exitPoints_.size(); i++) {
210         delPoints.insert(exitPoints_[i]);
211     }
212     for (unsigned i = 0; i < callPoints_.size(); i++) {
213         delPoints.insert(callPoints_[i]);
214     }
215     for (unsigned i = 0; i < arbitraryPoints_.size(); i++) {
216         delPoints.insert(arbitraryPoints_[i]);
217     }
218     set<instPoint*>::iterator pIter = unresolvedPoints_.begin();
219     for(; pIter != unresolvedPoints_.end(); pIter++) {
220         delPoints.insert(*pIter);
221     }
222     for (pIter = abruptEnds_.begin(); pIter != abruptEnds_.end(); pIter++) {
223         delPoints.insert(*pIter);
224     }
225     for (pIter = delPoints.begin(); delPoints.end() != pIter; pIter++) {
226         delete (*pIter);
227     }
228
229     // int_basicBlocks
230     set< int_basicBlock* , int_basicBlock::compare >::iterator
231         bIter = blockList.begin();
232     for (; bIter != blockList.end(); bIter++) {
233         delete *bIter;
234     }
235
236 #if defined(cap_relocation)
237     for (unsigned i = 0; i < enlargeMods_.size(); i++)
238         delete enlargeMods_[i];
239 #if defined (cap_use_pdvector)
240     enlargeMods_.zap();
241 #else
242     enlargeMods_.clear();
243 #endif
244 #endif
245     
246     for (unsigned i = 0; i < parallelRegions_.size(); i++)
247       delete parallelRegions_[i];
248       
249 }
250
251 // This needs to go away: how is "size" defined? Used bytes? End-start?
252
253 unsigned int_function::getSize_NP()  {
254     blocks();
255     if (blockList.size() == 0) return 0;
256             
257     return ((*blockList.rbegin())->origInstance()->endAddr() - 
258             (*blockList.begin())->origInstance()->firstInsnAddr());
259 }
260
261 void int_function::addArbitraryPoint(instPoint *insp) {
262     arbitraryPoints_.push_back(insp);
263 }
264
265 const pdvector<instPoint *> &int_function::funcEntries() {
266     if (entryPoints_.size() == 0 || obj()->isExploratoryModeOn()) {
267         entryPoints_.clear();
268         pdvector<image_instPoint *> img_entries;
269         ifunc_->funcEntries(img_entries);
270 #if defined (cap_use_pdvector)
271         entryPoints_.reserve_exact(img_entries.size());
272 #endif
273         for (unsigned i = 0; i < img_entries.size(); i++) {
274
275             // TEMPORARY FIX: we're seeing points identified by low-level
276             // code that aren't actually in the function.            
277             Address offsetInFunc = img_entries[i]->offset()-ifunc_->getOffset();
278             // add points that we've already seen
279             if ( instPsByAddr_.find( offsetInFunc + getAddress() ) ) {
280                 entryPoints_.push_back( instPsByAddr_[offsetInFunc + getAddress()] );
281                 continue;
282             }
283             if (!findBlockByOffset(offsetInFunc)) {
284                 fprintf(stderr, "Warning: unable to find block for entry point "
285                         "at 0x%lx (0x%lx) (func 0x%lx to 0x%lx\n",
286                         offsetInFunc,
287                         offsetInFunc+getAddress(),
288                         getAddress(),
289                         getAddress() + getSize_NP());
290                 
291                 continue;
292             }
293
294             instPoint *point = instPoint::createParsePoint(this,
295                                                            img_entries[i]);
296                         if (!point) continue; // Can happen if we double-create
297                         assert(point);
298             entryPoints_.push_back(point);
299         }
300     }
301 #if defined (cap_use_pdvector)
302     entryPoints_.reserve_exact(entryPoints_.size());
303 #endif
304     return entryPoints_;
305 }
306
307 const pdvector<instPoint*> &int_function::funcExits() {
308     if (exitPoints_.size() == 0 || obj()->isExploratoryModeOn()) {
309         exitPoints_.clear();
310         pdvector<image_instPoint *> img_exits;
311         ifunc_->funcExits(img_exits);
312 #if defined (cap_use_pdvector)
313         exitPoints_.reserve_exact(img_exits.size());
314 #endif
315         
316         for (unsigned i = 0; i < img_exits.size(); i++) {
317             // TEMPORARY FIX: we're seeing points identified by low-level
318             // code that aren't actually in the function.            
319             Address offsetInFunc = img_exits[i]->offset()-ifunc_->getOffset();
320             // add points that we've already seen
321             if ( instPsByAddr_.find( offsetInFunc + getAddress() ) ) {
322                 exitPoints_.push_back( instPsByAddr_[offsetInFunc + getAddress()] );
323                 continue;
324             }
325             if (!findBlockByOffset(offsetInFunc)) {
326                 fprintf(stderr, "Warning: unable to find block for exit point at 0x%lx (0x%lx) (func 0x%lx to 0x%lx\n",
327                         offsetInFunc,
328                         offsetInFunc+getAddress(),
329                         getAddress(),
330                         getAddress() + getSize_NP());
331                 
332                 continue;
333             }
334
335             instPoint *point = instPoint::createParsePoint(this,
336                                                            img_exits[i]);
337             if (!point) continue; // Can happen if we double-create
338
339             assert(point);
340             exitPoints_.push_back(point);
341         }
342     }
343 #if defined (cap_use_pdvector)
344     exitPoints_.reserve_exact(exitPoints_.size());
345 #endif
346     return exitPoints_;
347 }
348
349 const pdvector<instPoint*> &int_function::funcCalls() {
350     if (callPoints_.size() == 0 || obj()->isExploratoryModeOn()) {
351         callPoints_.clear();
352         pdvector<image_instPoint *> img_calls;
353         ifunc_->funcCalls(img_calls);
354 #if defined (cap_use_pdvector)
355         callPoints_.reserve_exact(img_calls.size());
356 #endif
357         
358         for (unsigned i = 0; i < img_calls.size(); i++) {
359             // TEMPORARY FIX: we're seeing points identified by low-level
360             // code that aren't actually in the function.            
361             Address offsetInFunc = img_calls[i]->offset()-ifunc_->getOffset();
362             // add points that we've already seen
363             if ( instPsByAddr_.find( offsetInFunc + getAddress() ) ) {
364                 callPoints_.push_back( instPsByAddr_[offsetInFunc + getAddress()] );
365                 continue;
366             }
367             if (!findBlockByOffset(offsetInFunc)) {
368                 fprintf(stderr, "Warning: unable to find block for call point "
369                         "at 0x%lx (0x%lx) (func 0x%lx to 0x%lx, %s/%s)\n",
370                         offsetInFunc,
371                         offsetInFunc+getAddress(),
372                         getAddress(),
373                         getAddress() + getSize_NP(),
374                         symTabName().c_str(),
375                         obj()->fileName().c_str());
376                 debugPrint();
377                 
378                 continue;
379             }
380             instPoint *point = instPoint::createParsePoint(this,
381                                                            img_calls[i]);
382             if (!point) continue; // Can happen if we double-create
383
384             assert(point);
385             callPoints_.push_back(point);
386         }
387     }
388 #if defined (cap_use_pdvector)
389     callPoints_.reserve_exact(callPoints_.size());
390 #endif
391     return callPoints_;
392 }
393
394 const pdvector<instPoint*> &int_function::funcArbitraryPoints() {
395   // We add these per-process, so there's no chance to have
396   // a parse-level list
397     return arbitraryPoints_;
398 }
399
400 const std::set<instPoint*> &int_function::funcUnresolvedControlFlow() 
401 {
402     if ( ! unresolvedPoints_.empty() && ! obj()->isExploratoryModeOn() ) {
403         return unresolvedPoints_;
404     }
405     pdvector<image_instPoint*> imgPoints;
406     ifunc()->funcUnresolvedControlFlow( imgPoints );
407     if ( unresolvedPoints_.size() != imgPoints.size() ) {
408
409         // convert image_instPoints to instPoints, add to set
410         pdvector<image_instPoint*>::iterator pIter = imgPoints.begin();
411         while (pIter != imgPoints.end()) {
412
413             // skip static transfers to known code
414             if ( ! (*pIter)->isDynamic() ) {
415                 codeRange *range = 
416                     proc()->findOrigByAddr((*pIter)->callTarget());
417                 if ( range && ! (range->is_mapped_object()) ) {
418                     pIter++;
419                     continue; 
420                 }
421             }
422
423             // find or create the new instPoint and add it to the vector
424             instPoint *curPoint;
425             Address ptAddr = (*pIter)->offset() 
426                                    + getAddress() 
427                                    - ifunc()->getOffset();
428             if (instPsByAddr_.find(ptAddr)) {
429                 curPoint = instPsByAddr_[ptAddr];
430             } else {
431                 curPoint = instPoint::createParsePoint(this, *pIter);
432             }
433             unresolvedPoints_.insert(curPoint); // std::set eliminates duplicates
434             pIter++;
435         }
436     }
437     return unresolvedPoints_;
438 }
439
440 const set<instPoint*> &int_function::funcAbruptEnds() 
441 {
442     if ( ! abruptEnds_.size() && ! obj()->isExploratoryModeOn() ) {
443         return abruptEnds_;
444     }
445
446     pdvector<image_instPoint*> imgPoints;
447     ifunc()->funcAbruptEnds( imgPoints );
448     if (abruptEnds_.size() != imgPoints.size()) {
449
450         // convert image_instPoints to instPoints, add to set
451         pdvector<image_instPoint*>::iterator pIter = imgPoints.begin();
452         while (pIter != imgPoints.end()) {
453
454             // find or create the new instPoint and add it to the vector
455             instPoint *curPoint;
456             Address ptAddr = (*pIter)->offset() 
457                                    + getAddress() 
458                                    - ifunc()->getOffset();
459             if (instPsByAddr_.find(ptAddr)) {
460                 curPoint = instPsByAddr_[ptAddr];
461             } else {
462                 curPoint = instPoint::createParsePoint(this, *pIter);
463             }
464             abruptEnds_.insert(curPoint); // std::set eliminates duplicates
465             pIter++;
466         }
467     }
468     return abruptEnds_;
469 }
470
471 void int_function::findBlocksByRange(std::vector<int_basicBlock*> &rangeBlocks, 
472                                      Address start, Address end)
473 {
474     std::set< int_basicBlock* , int_basicBlock::compare >::iterator biter;
475     biter = blockList.begin();
476     while (biter != blockList.end()) 
477     {
478         Address bstart = (*biter)->origInstance()->firstInsnAddr();
479         if (start <= bstart && bstart < end) 
480         {
481            rangeBlocks.push_back(*biter);
482         }
483         biter++;
484     }
485     if (rangeBlocks.size() == 0 ) {
486         //make sure we got an un-relocated range, haven't implemented this
487         // for relocated ranges
488         assert ( obj()->codeBase() <= start && 
489                  start < obj()->codeBase() + obj()->get_size() );
490     }
491 }
492
493 bool int_function::removePoint(instPoint *point) 
494 {
495     bool foundPoint = false;
496     if (instPsByAddr_.find(point->addr()))
497         instPsByAddr_.undef(point->addr());
498     switch(point->getPointType()) {
499     case functionEntry:
500         for (unsigned i = 0; !foundPoint && i < entryPoints_.size(); i++) {
501             if (entryPoints_[i] == point) {
502                 if (i < entryPoints_.size()-1) {
503                     entryPoints_[i] = entryPoints_[entryPoints_.size()-1];
504                 }
505                 entryPoints_.pop_back();
506                 foundPoint = true;
507             }
508         }
509         break;
510     case functionExit:
511         for (unsigned i = 0; !foundPoint && i < exitPoints_.size(); i++) {
512             if (exitPoints_[i]->addr() == point->addr()) {
513                 if (i < exitPoints_.size()-1) {
514                     exitPoints_[i] = exitPoints_[exitPoints_.size()-1];
515                 }
516                 exitPoints_.pop_back();
517                 foundPoint = true;
518             }
519         }
520         break;
521     case callSite:
522         for (unsigned i = 0; !foundPoint && i < callPoints_.size(); i++) {
523             if (callPoints_[i] == point) {
524                 if (i < callPoints_.size()-1) {
525                     callPoints_[i] = callPoints_[callPoints_.size()-1];
526                 }
527                 callPoints_.pop_back();
528                 foundPoint = true;
529             }
530         }
531         break;
532     case otherPoint:
533         for (unsigned i = 0; !foundPoint && i < arbitraryPoints_.size(); i++) {
534             if (arbitraryPoints_[i] == point) {
535                 if (i < arbitraryPoints_.size()-1) {
536                     arbitraryPoints_[i] = arbitraryPoints_[arbitraryPoints_.size()-1];
537                 }
538                 arbitraryPoints_.pop_back();
539                 foundPoint = true;
540             }
541         }
542         break;
543     default: // includes noneType
544         assert(0); // unhandled case!
545     }
546     if (unresolvedPoints_.find(point) != unresolvedPoints_.end()) {
547         unresolvedPoints_.erase(point);
548         foundPoint = true;
549     }
550     if (abruptEnds_.find(point) != abruptEnds_.end()) {
551         abruptEnds_.erase(point);
552         foundPoint = true;
553     }
554     if (point->imgPt()) {
555         ifunc()->img()->removeInstPoint(point->imgPt());
556     }
557     assert(foundPoint);
558     return foundPoint;
559 }
560
561 // returns true if a change was made, image layer is called independently
562 bool int_function::setPointResolved(instPoint *point)
563 {
564     bool foundPoint = false;
565     // look in unresolved points
566     set<instPoint*>::iterator pIter = unresolvedPoints_.find( point );
567     if (unresolvedPoints_.end() != pIter) {
568         unresolvedPoints_.erase(pIter);
569         foundPoint = true;
570     } else {
571         // check among the abruptEnd points
572         pIter = abruptEnds_.find( point );
573         if (abruptEnds_.end() != pIter) {
574             abruptEnds_.erase(pIter);
575             foundPoint = true;
576         }
577     }
578
579     // make sure the point is still accessible
580     assert( point == findInstPByAddr(point->addr()) );
581
582     if (!foundPoint) {
583         fprintf(stderr,"WARNING: Tried to resolve point at offset %lx "
584                 "that was already resolved %s[%d]\n",
585                 point->addr(),FILE__,__LINE__);
586     }
587     return foundPoint;
588 }
589
590 // finds new entry point, sets the argument to the new 
591 Address int_function::setNewEntryPoint(int_basicBlock *& newEntry)
592 {
593     newEntry = NULL;
594
595     // find block with no intraprocedural entry edges
596     assert(blockList.size());
597     set< int_basicBlock* , int_basicBlock::compare >::iterator bIter;
598     for (bIter = blockList.begin(); 
599          bIter != blockList.end(); 
600          bIter++) 
601         {
602             SingleContext epred(ifunc(),true,true);
603             Block::edgelist & ib_ins = (*bIter)->llb()->sources();
604             Block::edgelist::iterator eit = ib_ins.begin(&epred);
605             if (eit != ib_ins.end()) {
606                 assert(!newEntry);
607                 newEntry = *bIter;
608             }
609         }
610     if( ! newEntry) {
611         newEntry = *blockList.begin();
612     }
613
614     assert(!newEntry->llb()->isShared()); //KEVINTODO: unimplemented case
615
616     //create and add an entry point for the image_func
617     int insn_size = 0;
618     unsigned char * insn_buf = (unsigned char *) obj()->getPtrToInstruction
619         (newEntry->origInstance()->firstInsnAddr());
620 #if defined(cap_instruction_api)
621     using namespace InstructionAPI;
622     InstructionDecoder dec
623         (insn_buf,InstructionDecoder::maxInstructionLength,proc()->getArch());
624     Instruction::Ptr insn = dec.decode();
625     if(insn)
626         insn_size = insn->size();
627 #else
628     InstrucIter ah(newEntry->origInstance()->firstInsnAddr(),
629                    newEntry->origInstance()->getSize(),
630                    proc());
631     instruction insn = ah.getInstruction();
632     insn_size = insn.size();
633 #endif
634     image_instPoint *imgPoint = new image_instPoint(
635         newEntry->llb()->firstInsnOffset(),
636         insn_buf,
637         insn_size,
638         ifunc()->img(),
639         functionEntry);
640     ifunc()->img()->addInstPoint(imgPoint);
641
642     // create and add an entry point for the int_func
643     instPoint *point = 
644         findInstPByAddr( newEntry->origInstance()->firstInsnAddr() );
645     if (NULL == point) {
646         point = instPoint::createParsePoint(this, imgPoint);
647     }
648         assert(point);
649     entryPoints_.push_back(point);
650
651     // change function base address
652     addr_ = newEntry->origInstance()->firstInsnAddr();
653     return newEntry->origInstance()->firstInsnAddr();
654 }
655
656 /* 0. The target and source must be in the same mapped region, make sure memory
657  *    for the target is up to date
658  * 1. Parse from target address, add new edge at image layer
659  * 2. Add image blocks as int_basicBlocks
660  * 3. Add image points, as instPoints, fix up mapping of split blocks with points
661  * 4. Register all newly created functions as a result of new edge parsing
662 */
663 bool int_function::parseNewEdges( std::vector<ParseAPI::Block*> &sources, 
664                                   std::vector<Address> &targets, 
665                                   std::vector<EdgeTypeEnum> &edgeTypes)
666 {
667     using namespace SymtabAPI;
668
669 /* 0. The target and source must be in the same mapped region, make sure memory
670       for the target is up to date */
671
672     // Update set of active multiTramps before parsing 
673     if (NULL != proc()->proc()) {// if it's a process and not a binEdit
674         proc()->proc()->updateActiveMultis();
675     }
676
677     Address loadAddr = getAddress() - ifunc()->getOffset();
678     std::set<Region*> targRegions;
679     for (unsigned idx=0; idx < targets.size(); idx++) {
680         Region *targetRegion = ifunc()->img()->getObject()->
681             findEnclosingRegion( targets[idx]-loadAddr );
682
683         // same region check
684         if (NULL != sources[idx]) {
685             Region *sourceRegion = ifunc()->img()->getObject()->
686                 findEnclosingRegion( sources[idx]->start() );
687             assert(targetRegion == sourceRegion );
688
689         }
690         // update target region
691         if (targRegions.end() == targRegions.find(targetRegion)) {
692             obj()->updateMappedFileIfNeeded(targets[idx],targetRegion);
693             targRegions.insert(targetRegion);
694         }
695         // translate targets to memory offsets rather than absolute addrs
696         targets[idx] -= loadAddr;
697     }
698
699 /* 1. Parse from target address, add new edge at image layer  */
700     assert( !ifunc()->img()->hasSplitBlocks() && 
701             !ifunc()->img()->hasNewBlocks());
702     // parses and adds new blocks to image-layer datastructures
703     ifunc()->img()->codeObject()->parseNewEdges(sources, targets, edgeTypes);
704
705 /* 2. Add function blocks to int-level datastructures         */
706     //vector<int_basicBlock*> newBlocks;
707     addMissingBlocks(); // also adds to mapped_object and re-sizes function
708
709 /* 3. Add image points to int-level datastructures, fix up mapping of split blocks with points */
710     addMissingPoints();
711     // See if block splitting has caused problems with existing points
712     if (ifunc()->img()->hasSplitBlocks()) {
713         obj()->splitIntLayer();
714         ifunc()->img()->clearSplitBlocks();
715     }
716
717 /* 4. Register all newly created image_funcs as a result of new edge parsing */
718     pdvector<int_function*> intfuncs;
719     obj()->getAllFunctions(intfuncs);
720
721     return true;
722 }
723
724
725 void int_function::setHandlerFaultAddr(Address fa) 
726
727     handlerFaultAddr_ = fa; 
728 }
729
730 // Sets the address in the structure at which the fault instruction's
731 // address is stored if "set" is true.  Accesses the fault address and 
732 // translates it back to an original address if it corresponds to 
733 // relocated code in the Dyninst heap 
734 void int_function::setHandlerFaultAddrAddr(Address faa, bool set) 
735
736     if (set) {
737         // save the faultAddrAddr
738         handlerFaultAddrAddr_ = faa; 
739     }
740
741     // get the faultAddr 
742     assert(proc()->proc());
743     assert(sizeof(Address) == proc()->getAddressWidth());
744     Address faultAddr=0;
745     assert (proc()->readDataSpace
746         ((void*)faa, proc()->getAddressWidth(), (void*)&faultAddr, true));
747
748     // translate the faultAddr back to an original address, and if
749     // that translation was necessary, save it to the faultAddrAddr in the 
750     // CONTEXT struct
751     if (proc()->proc()->isRuntimeHeapAddr(faultAddr)) {
752         codeRange *range = proc()->findOrigByAddr(faultAddr);
753         if (range->is_multitramp()) {
754             faultAddr = range->is_multitramp()->instToUninstAddr(faultAddr);
755             range = proc()->findOrigByAddr( faultAddr );
756         }
757         bblInstance *curbbi = range->is_basicBlockInstance();
758         assert(curbbi);
759         faultAddr = curbbi->equivAddr(0,faultAddr);
760         assert( proc()->writeDataSpace((void*)faa, 
761                                        sizeof(Address), 
762                                        (void*)&faultAddr) );
763     }
764 }
765
766 // Set the handler return addr to the most recent instrumented or
767 // relocated address, similar to instPoint::instrSideEffect.
768 // Also, make sure that we update our mapped view of memory, 
769 // we may have overwritten memory that was previously not code
770 void int_function::fixHandlerReturnAddr(Address faultAddr)
771 {
772     if ( !proc()->proc() || ! handlerFaultAddrAddr_ ) {
773         assert(0);
774         return;
775     }
776     bblInstance *bbi = proc()->findOrigByAddr(faultAddr)->
777         is_basicBlockInstance();
778     if (bbi) {
779         // get relocated PC address
780         Address newPC = bbi->equivAddr(bbi->func()->version(), faultAddr);
781         multiTramp *multi = proc()->findMultiTrampByAddr(newPC);
782         if (multi) {
783             newPC = multi->uninstToInstAddr(newPC);
784         }
785         assert(newPC);
786         assert(proc()->proc()->getAddressWidth() == sizeof(Address));
787         if (newPC != faultAddr) {
788             assert( proc()->writeDataSpace((void*)handlerFaultAddrAddr_, 
789                                            sizeof(Address), 
790                                            (void*)&newPC) );
791         }
792     }
793 }
794
795 // doesn't delete the ParseAPI::Block's, those are removed in a batch
796 // call to the parseAPI
797 void int_function::deleteBlock(int_basicBlock* block) 
798 {
799     // init stuff
800     assert(block && this == block->func());
801     bblInstance *origbbi = block->origInstance();
802     image_basicBlock *imgBlock = block->llb();
803     assert( ! imgBlock->isShared() ); //KEVINTODO: unimplemented case
804     Address baseAddr = ifunc()->img()->desc().loadAddr();
805
806     // remove points
807     pdvector<image_instPoint*> imgPoints;
808     ifunc()->img()->getInstPoints( origbbi->firstInsnAddr(), 
809                                    origbbi->endAddr(), 
810                                    imgPoints );
811     for (unsigned pidx=0; pidx < imgPoints.size(); pidx++) {
812         image_instPoint *imgPt = imgPoints[pidx];
813         instPoint *point = findInstPByAddr( imgPt->offset() + baseAddr );
814         if (!point) {
815             addMissingBlocks();
816             point = findInstPByAddr( imgPt->offset() + baseAddr );
817         }
818         point->removeMultiTramps();
819         removePoint( point );
820     }
821
822
823     // Remove block from int-level datastructures 
824     pdvector<bblInstance*> bbis = block->instances();
825     for (unsigned bIdx = 1; bIdx < bbis.size(); bIdx++) 
826     {   // the original instance is not in the process range
827         proc()->removeOrigRange(bbis[bIdx]);
828     }
829     for (unsigned bidx=0; bidx < block->instances_.size(); bidx++) {
830         deleteBBLInstance(block->instances_[bidx]);
831     }
832     blockList.erase(block);
833     obj()->removeRange(block->origInstance());
834     
835     // delete block? 
836     delete(block);
837 }
838
839 // Remove funcs from:
840 //   mapped_object & mapped_module datastructures
841 //   addressSpace::textRanges codeRangeTree<int_function*> 
842 //   image-level & SymtabAPI datastructures
843 //   BPatch_addressSpace::BPatch_funcMap <int_function -> BPatch_function>
844 void int_function::removeFromAll() 
845 {
846     mal_printf("purging blocklist of size = %d\n",blockList.size());
847     set< int_basicBlock* , int_basicBlock::compare >::const_iterator bIter;
848     for (bIter = blockList.begin(); 
849          bIter != blockList.end(); 
850          bIter++) 
851     {
852         bblInstance *bbi = (*bIter)->origInstance();
853         mal_printf("block [%lx %lx]\n",bbi->firstInsnAddr(), bbi->endAddr());
854     }
855     // delete blocks 
856     for (bIter = blockList.begin(); 
857          bIter != blockList.end();
858          bIter = blockList.begin()) 
859     {
860         deleteBlock(*bIter);// removes block from blockList too
861     }
862     // remove from mapped_object & mapped_module datastructures
863     obj()->removeFunction(this);
864     mod()->removeFunction(this);
865
866     // remove points
867     entryPoints_.clear();
868     callPoints_.clear();
869     exitPoints_.clear();
870     arbitraryPoints_.clear();
871     abruptEnds_.clear();
872     unresolvedPoints_.clear();
873     instPsByAddr_.clear();
874
875     // remove func & blocks from image, ParseAPI, & SymtabAPI datastructures
876     ifunc()->img()->deleteFunc(ifunc());
877 }
878
879 void int_function::addMissingBlock(image_basicBlock & imgBlock)
880 {
881     Address baseAddr = this->getAddress() - ifunc()->getOffset();
882     int_basicBlock *intBlock = findBlockByAddr( 
883         imgBlock.firstInsnOffset() + baseAddr );
884
885     if ( intBlock && &imgBlock != intBlock->llb() ) 
886     {
887         // the block was split during parsing, (or there's real block 
888         // overlapping) adjust the end and lastInsn fields of both 
889         // bblInstances 
890         bblInstance *curInst = intBlock->origInstance();
891         image_basicBlock *curImgB = intBlock->llb();
892         Address blockBaseAddr = curInst->firstInsnAddr() - 
893             curImgB->firstInsnOffset();
894         curInst->setEndAddr( curImgB->endOffset() + blockBaseAddr );
895         curInst->setLastInsnAddr( curImgB->lastInsnOffset() + blockBaseAddr );
896         // instance 2
897         bblInstance *otherInst = findBlockInstanceByAddr
898                         (imgBlock.firstInsnOffset() + baseAddr);
899         if (otherInst && otherInst != curInst) {
900             curInst = otherInst;
901             curImgB = intBlock->llb();
902             blockBaseAddr = curInst->firstInsnAddr() - 
903                 curImgB->firstInsnOffset();
904             curInst->setEndAddr( curImgB->endOffset() + blockBaseAddr );
905             curInst->setLastInsnAddr(curImgB->lastInsnOffset() + blockBaseAddr);
906         }
907
908         // now try and find the block again
909         int_basicBlock *newIntBlock = findBlockByAddr( 
910             imgBlock.firstInsnOffset() + baseAddr );
911         if (intBlock == newIntBlock) {
912             // there's real overlapping going on, find the intBlock 
913             // that starts at the right address (if there is one) 
914             mal_printf("WARNING: overlapping blocks, major obfuscation or "
915                     "bad parse [%lx %lx] [%lx %lx] %s[%d]\n",
916                     intBlock->origInstance()->firstInsnAddr(), 
917                     intBlock->origInstance()->endAddr(), 
918                     baseAddr + imgBlock.firstInsnOffset(), 
919                     baseAddr + imgBlock.endOffset(), 
920                     FILE__,__LINE__);
921             
922             intBlock = NULL;
923             for (set<int_basicBlock*,int_basicBlock::compare>::iterator 
924                  bIter = blockList.begin();
925                  bIter != blockList.end(); 
926                  bIter++) 
927             {
928                 if ( (baseAddr + imgBlock.firstInsnOffset()) == 
929                      (*bIter)->origInstance()->firstInsnAddr() )
930                 {
931                     intBlock = (*bIter);
932                     break;
933                 }
934             }
935         } else {
936             intBlock = newIntBlock;
937         }
938     }
939
940     if ( ! intBlock ) {
941         // create new int_basicBlock and add it to our datastructures
942         intBlock = new int_basicBlock
943             ( &imgBlock, baseAddr, this, nextBlockID );
944         bblInstance *bbi = intBlock->origInstance();
945         assert(bbi);
946         blocksByAddr_.insert(bbi);
947         nextBlockID++;
948         blockList.insert(intBlock);
949         blockIDmap[imgBlock.id()] = blockIDmap.size();
950     } 
951 }
952
953
954 /* Find image_basicBlocks that are missing from these datastructures and add
955  * them.  The int_basicBlock constructor does pretty much all of the work in
956  * a chain of side-effects extending all the way into the mapped_object class
957  * 
958  * We have to take into account that additional parsing may cause basic block splitting,
959  * in which case it is necessary not only to add new int-level blocks, but to update 
960  * int_basicBlock, bblInstance, and BPatch_basicBlock objects. 
961  */
962 void int_function::addMissingBlocks()
963 {
964     if ( blockList.empty() ) 
965         blocks();
966
967     Function::blocklist & imgBlocks = ifunc_->blocks();
968     Function::blocklist::iterator sit = imgBlocks.begin();
969     for( ; sit != imgBlocks.end(); ++sit) {
970         addMissingBlock( *dynamic_cast<image_basicBlock*>(*sit) );
971     }
972 }
973
974 /* trigger search in image_layer points vectors to be added to int_level 
975  * datastructures
976  */
977 void int_function::addMissingPoints()
978 {
979     // the "true" parameter causes the helper functions to search for new 
980     // points in the image, bypassing cached points
981     funcEntries();
982     funcExits();
983     funcCalls();
984     funcUnresolvedControlFlow();
985     funcAbruptEnds();
986 }
987
988 // get instPoints of known function callsinto this one
989 void int_function::getCallerPoints(std::vector<instPoint*>& callerPoints)
990 {
991     int_basicBlock *entryBlock = findBlockByAddr(getAddress());
992     assert(entryBlock);
993     pdvector<int_basicBlock*> sourceBlocks;
994     entryBlock->getSources(sourceBlocks);
995     for (unsigned bIdx=0; bIdx < sourceBlocks.size(); bIdx++) {
996         instPoint *callPoint = sourceBlocks[bIdx]->func()->findInstPByAddr
997             (sourceBlocks[bIdx]->origInstance()->lastInsnAddr());
998         if (!callPoint) {
999             sourceBlocks[bIdx]->func()->funcCalls();
1000             callPoint = sourceBlocks[bIdx]->func()->findInstPByAddr
1001                 (sourceBlocks[bIdx]->origInstance()->lastInsnAddr());
1002         }
1003         if (callPoint) {
1004             callerPoints.push_back(callPoint);
1005         }
1006     }
1007 }
1008
1009
1010 instPoint *int_function::findInstPByAddr(Address addr) {
1011     // This only finds instPoints that have been previously created...
1012     // so don't bother parsing. 
1013     
1014     if (instPsByAddr_.find(addr))
1015         return instPsByAddr_[addr];
1016
1017     // The above should have been sufficient... however, if we forked and have
1018     // a baseTramp that does not contain instrumentation, then there will never
1019     // be a instPointInstance created, and so no entry in instPsByAddr_. Argh.
1020     // So, if the lookup above failed, do the slow search through entries, 
1021     // exits, and calls - arbitraries should already exist.
1022     for (unsigned i = 0; i < entryPoints_.size(); i++) {
1023         if (entryPoints_[i]->addr() == addr) {
1024             instPsByAddr_[addr] = entryPoints_[i];
1025             return entryPoints_[i];
1026         }
1027     }
1028     for (unsigned i = 0; i < exitPoints_.size(); i++) {
1029         if (exitPoints_[i]->addr() == addr) {
1030             instPsByAddr_[addr] = exitPoints_[i];
1031             return exitPoints_[i];
1032         }
1033     }
1034     for (unsigned i = 0; i < callPoints_.size(); i++) {
1035         if (callPoints_[i]->addr() == addr) {
1036             instPsByAddr_[addr] = callPoints_[i];
1037             return callPoints_[i];
1038         }
1039     }
1040     std::set< instPoint* >::iterator pIter = unresolvedPoints_.begin();
1041     while ( pIter != unresolvedPoints_.end() ) {
1042         if ( (*pIter)->addr() == addr ) {
1043             instPsByAddr_[addr] = *pIter;
1044             return *pIter;
1045         }
1046         pIter++;
1047     }
1048     pIter = abruptEnds_.begin();
1049     while ( pIter != abruptEnds_.end() ) {
1050         if ( (*pIter)->addr() == addr ) {
1051             instPsByAddr_[addr] = *pIter;
1052             return *pIter;
1053         }
1054         pIter++;
1055     }
1056
1057     return NULL;
1058 }
1059
1060 void int_function::registerInstPointAddr(Address addr, instPoint *inst) {
1061     instPoint *oldInstP = findInstPByAddr(addr);
1062     if (oldInstP) assert(inst == oldInstP);
1063
1064     instPsByAddr_[addr] = inst;
1065 }
1066
1067 void int_function::unregisterInstPointAddr(Address addr, instPoint* inst) {
1068     instPoint *oldInstP = findInstPByAddr(addr);
1069     assert(oldInstP == inst);
1070
1071     instPsByAddr_.undef(addr);
1072 }
1073
1074 void print_func_vector_by_pretty_name(std::string prefix,
1075                                       pdvector<int_function *>*funcs) {
1076     unsigned int i;
1077     int_function *func;
1078     for(i=0;i<funcs->size();i++) {
1079       func = ((*funcs)[i]);
1080       cerr << prefix << func->prettyName() << endl;
1081     }
1082 }
1083
1084 mapped_module *int_function::mod() const { return mod_; }
1085 mapped_object *int_function::obj() const { return mod()->obj(); }
1086 AddressSpace *int_function::proc() const { return obj()->proc(); }
1087
1088 bblInstance *int_function::findBlockInstanceByAddr(Address addr) {
1089     codeRange *range;
1090     if (blockList.empty()) {
1091         // Will make the block list...
1092         blocks();
1093     }
1094     
1095     if (blocksByAddr_.find(addr, range)) {
1096         assert(range->is_basicBlockInstance());
1097         return range->is_basicBlockInstance();
1098     }
1099     return NULL;
1100 }
1101
1102 int_basicBlock *int_function::findBlockByAddr(Address addr) {
1103     bblInstance *inst = findBlockInstanceByAddr(addr);
1104     if (inst)
1105         return inst->block();
1106     else
1107         return NULL;
1108 }
1109
1110
1111 const std::set<int_basicBlock*,int_basicBlock::compare> &int_function::blocks()
1112 {
1113     int i = 0;
1114
1115     if (blockList.empty()) {
1116         Address base = getAddress() - ifunc_->getOffset();
1117
1118         Function::blocklist & img_blocks = ifunc_->blocks();
1119         Function::blocklist::iterator sit = img_blocks.begin();
1120
1121         for( ; sit != img_blocks.end(); ++sit) {
1122             image_basicBlock *b = (image_basicBlock*)*sit;
1123             blockList.insert( new int_basicBlock(b, base, this, i) );
1124             blockIDmap[b->id()] = i;
1125             ++i;
1126         }
1127         nextBlockID = i;
1128     }
1129     return blockList;
1130 }
1131
1132 AddressSpace *int_basicBlock::proc() const {
1133     return func()->proc();
1134 }
1135
1136 // Note that code sharing is masked at this level. That is, edges
1137 // to and from a block that do not originate from the low-level function
1138 // that this block's int_function represents will not be included in
1139 // the returned block collection
1140 void int_basicBlock::getSources(pdvector<int_basicBlock *> &ins) const {
1141
1142     /* Only allow edges that are within this current function; hide sharing */
1143     /* Also avoid CALL and RET edges */
1144     SingleContext epred(func()->ifunc(),true,true);
1145     Intraproc epred2(&epred);
1146
1147     Block::edgelist & ib_ins = ib_->sources();
1148     Block::edgelist::iterator eit = ib_ins.begin(&epred2);
1149
1150     for( ; eit != ib_ins.end(); ++eit) {
1151         // FIXME debugging assert
1152         assert((*eit)->type() != CALL && (*eit)->type() != RET);
1153
1154         image_basicBlock * sb = (image_basicBlock*)(*eit)->src();
1155         int_basicBlock *sblock = func()->findBlockByAddr
1156             ( sb->start() + 
1157               func()->getAddress() - 
1158               func()->ifunc()->getOffset() );
1159         if (!sblock) {
1160             fprintf(stderr,"ERROR: no corresponding intblock for "
1161                     "imgblock #%d at 0x%lx %s[%d]\n", ib_->id(),
1162                     ib_->firstInsnOffset(),FILE__,__LINE__); 
1163             assert(0);
1164         }
1165         ins.push_back( sblock );
1166     }
1167 }
1168
1169 void int_basicBlock::getTargets(pdvector<int_basicBlock *> &outs) const {
1170     SingleContext epred(func()->ifunc(),true,true);
1171     Intraproc epred2(&epred);
1172     NoSinkPredicate epred3(&epred2);
1173
1174     Block::edgelist & ib_outs = ib_->targets();
1175     Block::edgelist::iterator eit = ib_outs.begin(&epred3);
1176
1177     for( ; eit != ib_outs.end(); ++eit) {
1178         // FIXME debugging assert
1179         assert((*eit)->type() != CALL && (*eit)->type() != RET);
1180         image_basicBlock * tb = (image_basicBlock*)(*eit)->trg();
1181         int_basicBlock* tblock = func()->findBlockByAddr
1182             ( tb->start() + 
1183               func()->getAddress() - 
1184               func()->ifunc()->getOffset() );
1185         if (!tblock) {
1186             fprintf(stderr,"ERROR: no corresponding intblock for "
1187                     "imgblock #%d at 0x%lx %s[%d]\n", ib_->id(),
1188                     ib_->firstInsnOffset(),FILE__,__LINE__);                    
1189             assert(0);
1190         }
1191         outs.push_back(tblock);
1192     }
1193 }
1194
1195 EdgeTypeEnum int_basicBlock::getTargetEdgeType(int_basicBlock * target) const {
1196     SingleContext epred(func()->ifunc(),true,true);
1197     Block::edgelist & ib_outs = ib_->targets();
1198     Block::edgelist::iterator eit = ib_outs.begin(&epred);
1199     for( ; eit != ib_outs.end(); ++eit)
1200         if((*eit)->trg() == target->ib_)
1201             return (*eit)->type();
1202     return NOEDGE;
1203 }
1204
1205 EdgeTypeEnum int_basicBlock::getSourceEdgeType(int_basicBlock *source) const {
1206     SingleContext epred(func()->ifunc(),true,true);
1207     Block::edgelist & ib_ins = ib_->sources();
1208     Block::edgelist::iterator eit = ib_ins.begin(&epred);
1209     for( ; eit != ib_ins.end(); ++eit)
1210         if((*eit)->src() == source->ib_)
1211             return (*eit)->type();
1212     return NOEDGE;
1213 }
1214
1215 int_basicBlock *int_basicBlock::getFallthrough() const {
1216     SingleContext epred(func()->ifunc(),true,true);
1217     NoSinkPredicate epred2(&epred);
1218     Block::edgelist & ib_outs = ib_->targets();
1219     Block::edgelist::iterator eit = ib_outs.begin(&epred2);
1220     for( ; eit != ib_outs.end(); ++eit) {
1221         Edge * e = *eit;
1222         if(e->type() == FALLTHROUGH ||
1223            e->type() == CALL_FT ||
1224            e->type() == COND_NOT_TAKEN)
1225         {
1226             return func()->findBlockByAddr
1227                 ( ((image_basicBlock*)e->trg())->firstInsnOffset() + 
1228                   func()->getAddress()-func()->ifunc()->getOffset() );
1229         }
1230     }
1231     return NULL;
1232 }
1233
1234 bool int_basicBlock::needsRelocation() const {
1235    if(ib_->isShared() || ib_->needsRelocation()) {
1236         // If we've _already_ relocated, then we're no longer shared
1237         // because we have our own copy.
1238
1239         if (instances_.size() > 1) {
1240             return false;
1241         }
1242
1243         // We have only the one instance, so we're still shared.
1244         return true;
1245     }
1246     //else if(isEntryBlock() && func()->containsSharedBlocks())
1247     //    return true;
1248     else
1249         return false;
1250 }
1251
1252 bool int_basicBlock::isEntryBlock() const { 
1253     return ib_->isEntryBlock(func_->ifunc());
1254 }
1255
1256 unsigned int_function::getNumDynamicCalls()
1257 {
1258    unsigned count=0;
1259    pdvector<instPoint *> callPoints = funcCalls();
1260
1261    for (unsigned i=0; i<callPoints.size(); i++)
1262    {
1263       if (callPoints[i]->isDynamic())
1264           count++;
1265    }
1266    return count;
1267 }
1268
1269 const string &int_function::symTabName() const { 
1270     return ifunc_->symTabName(); 
1271 }
1272
1273 void int_function::debugPrint() const {
1274     fprintf(stderr, "Function debug dump (%p):\n", this);
1275     fprintf(stderr, "  Symbol table names:\n");
1276     for (unsigned i = 0; i < symTabNameVector().size(); i++) {
1277         fprintf(stderr, "    %s\n", symTabNameVector()[i].c_str());
1278     }
1279     fprintf(stderr, "  Demangled names:\n");
1280     for (unsigned j = 0; j < prettyNameVector().size(); j++) {
1281         fprintf(stderr, "    %s\n", prettyNameVector()[j].c_str());
1282     }
1283     fprintf(stderr, "  Typed names:\n");
1284     for (unsigned k = 0; k < typedNameVector().size(); k++) {
1285         fprintf(stderr, "    %s\n", typedNameVector()[k].c_str());
1286     }
1287     fprintf(stderr, "  Address: 0x%lx\n", getAddress());
1288     fprintf(stderr, "  Internal pointer: %p\n", ifunc_);
1289     fprintf(stderr, "  Object: %s (%p), module: %s (%p)\n", 
1290             obj()->fileName().c_str(), 
1291             obj(),
1292             mod()->fileName().c_str(),
1293             mod());
1294     for (set< int_basicBlock * , int_basicBlock::compare >::const_iterator 
1295              cb = blockList.begin();
1296          cb != blockList.end(); 
1297          cb++) 
1298     {
1299         bblInstance* orig = (*cb)->origInstance();
1300         fprintf(stderr, "  Block start 0x%lx, end 0x%lx\n", orig->firstInsnAddr(),
1301                 orig->endAddr());
1302     }
1303 }
1304
1305 // Add to internal
1306 // Add to mapped_object if a "new" name (true return from internal)
1307 void int_function::addSymTabName(const std::string name, bool isPrimary) {
1308     if (ifunc()->addSymTabName(name, isPrimary))
1309         obj()->addFunctionName(this, name, mapped_object::mangledName);
1310 }
1311
1312 void int_function::addPrettyName(const std::string name, bool isPrimary) {
1313     if (ifunc()->addPrettyName(name, isPrimary))
1314         obj()->addFunctionName(this, name, mapped_object::prettyName);
1315 }
1316
1317 void int_function::getStaticCallers(pdvector< int_function * > &callers)
1318 {
1319     pdvector<image_edge *> ib_ins;
1320
1321     if(!ifunc_ || !ifunc_->entryBlock())
1322         return;
1323
1324     Block::edgelist & ins = ifunc_->entryBlock()->sources();
1325     Block::edgelist::iterator eit = ins.begin();
1326     for( ; eit != ins.end(); ++eit) {
1327         if((*eit)->type() == CALL)
1328         {   
1329             vector<Function *> ifuncs;
1330             (*eit)->src()->getFuncs(ifuncs);
1331             vector<Function *>::iterator ifit = ifuncs.begin();
1332             for( ; ifit != ifuncs.end(); ++ifit)
1333             {   
1334                 int_function * f;
1335                 f = obj()->findFunction((image_func*)*ifit);
1336                 
1337                 callers.push_back(f);
1338             }
1339         }
1340     }
1341 }
1342
1343 void int_function::addBBLInstance(bblInstance *instance) {
1344     assert(instance);
1345     blocksByAddr_.insert(instance);
1346 }
1347
1348 void int_function::deleteBBLInstance(bblInstance *instance) {
1349     assert(instance);
1350     blocksByAddr_.remove(instance->firstInsnAddr());
1351 }
1352
1353 image_func *int_function::ifunc() {
1354     return ifunc_;
1355 }
1356
1357 int int_basicBlock_count = 0;
1358
1359 int_basicBlock::int_basicBlock(image_basicBlock *ib, Address baseAddr, int_function *func, int id) :
1360     func_(func),
1361     ib_(ib),
1362     id_(id)
1363 {
1364 #if defined(ROUGH_MEMORY_PROFILE)
1365     int_basicBlock_count++;
1366     if ((int_basicBlock_count % 100) == 0)
1367         fprintf(stderr, "int_basicBlock_count: %d (%d)\n",
1368                 int_basicBlock_count, int_basicBlock_count*sizeof(int_basicBlock));
1369 #endif
1370
1371     bblInstance *inst = new bblInstance(ib->firstInsnOffset() + baseAddr,
1372                                         ib->lastInsnOffset() + baseAddr,
1373                                         ib->endOffset() + baseAddr,
1374                                         this, 
1375                                         0);
1376     instances_.push_back(inst);
1377     assert(func_);
1378     func_->addBBLInstance(inst);
1379 }
1380
1381 int_basicBlock::int_basicBlock(const int_basicBlock *parent, int_function *func,int id) :
1382     func_(func),
1383     ib_(parent->ib_),
1384     id_(id)
1385 {
1386     for (unsigned i = 0; i < parent->instances_.size(); i++) {
1387         bblInstance *bbl = new bblInstance(parent->instances_[i], this);
1388         instances_.push_back(bbl);
1389         func_->addBBLInstance(bbl);
1390     }
1391 }
1392
1393 int_basicBlock::~int_basicBlock() {
1394     // don't kill func_;
1395     // don't kill ib_;
1396     for (unsigned i = 0; i < instances_.size(); i++) {
1397         delete instances_[i];
1398     }
1399 #if defined (cap_use_pdvector)
1400     instances_.zap();
1401 #else
1402     instances_.clear();
1403 #endif
1404 }
1405
1406 bblInstance *int_basicBlock::origInstance() const {
1407     assert(instances_.size());
1408     return instances_[0];
1409 }
1410
1411 bblInstance *int_basicBlock::instVer(unsigned id) const {
1412     if (id >= instances_.size())
1413     {
1414         fprintf(stderr, "ERROR: requesting bblInstance %u, only %d known "
1415                 "for block at 0x%lx %s[%d]\n", id, (int)instances_.size(), 
1416                 instances_[0]->firstInsnAddr(), FILE__,__LINE__);
1417         return instances_[instances_.size()-1];
1418     }
1419     return instances_[id];
1420 }
1421
1422 void int_basicBlock::removeVersion(unsigned id, bool deleteInstance) {
1423     if (id >= instances_.size()) {
1424         fprintf(stderr, "ERROR: deleting bblInstance %u, only %ld known\n",
1425                 id, (long) instances_.size());
1426         return;
1427     }
1428     if (id < (instances_.size() - 1)) {
1429         fprintf(stderr, "ERROR: deleting bblInstance %u, not last\n",
1430                 id);
1431         assert(0);
1432         return;
1433     }
1434     if (deleteInstance) {
1435         bblInstance *inst = instances_[id];
1436         delete inst;
1437     }
1438     instances_.pop_back();
1439 }
1440
1441
1442 const pdvector<bblInstance *> &int_basicBlock::instances() const {
1443     return instances_;
1444 }
1445
1446 int bblInstance_count = 0;
1447
1448 bblInstance::bblInstance(Address start, Address last, Address end, int_basicBlock *parent, int version) : 
1449 #if defined(cap_relocation)
1450     reloc_info(NULL),
1451 #endif
1452     firstInsnAddr_(start),
1453     lastInsnAddr_(last),
1454     blockEndAddr_(end),
1455     block_(parent),
1456     version_(version)
1457 {
1458 #if defined(ROUGH_MEMORY_PROFILE)
1459     bblInstance_count++;
1460     if ((bblInstance_count % 100) == 0)
1461         fprintf(stderr, "bblInstance_count: %d (%d)\n",
1462                 bblInstance_count, bblInstance_count*sizeof(bblInstance));
1463 #endif
1464
1465
1466     // And add to the mapped_object code range
1467     block_->func()->obj()->codeRangesByAddr_.insert(this);
1468 };
1469
1470 bblInstance::bblInstance(int_basicBlock *parent, int version) : 
1471 #if defined(cap_relocation)
1472     reloc_info(NULL),
1473 #endif
1474     firstInsnAddr_(0),
1475     lastInsnAddr_(0),
1476     blockEndAddr_(0),
1477     block_(parent),
1478     version_(version)
1479 {
1480     // And add to the mapped_object code range
1481     //block_->func()->obj()->codeRangesByAddr_.insert(this);
1482 };
1483
1484 bblInstance::bblInstance(const bblInstance *parent, int_basicBlock *block) :
1485 #if defined(cap_relocation)
1486     reloc_info(NULL),
1487 #endif
1488     firstInsnAddr_(parent->firstInsnAddr_),
1489     lastInsnAddr_(parent->lastInsnAddr_),
1490     blockEndAddr_(parent->blockEndAddr_),
1491     block_(block),
1492     version_(parent->version_) {
1493 #if defined(cap_relocation)
1494    if (parent->reloc_info) {
1495       reloc_info = new reloc_info_t(parent->reloc_info, block);
1496    }
1497 #endif
1498
1499     // If the bblInstance is the original version, add to the mapped_object
1500     // code range; if it is the product of relocation, add it to the
1501     // process.
1502     if(version_ == 0)
1503         block_->func()->obj()->codeRangesByAddr_.insert(this);
1504     else
1505         block_->func()->obj()->proc()->addOrigRange(this);
1506 }
1507
1508 bblInstance::~bblInstance() {
1509 #if defined(cap_relocation)
1510    if (reloc_info)
1511       delete reloc_info;
1512 #endif
1513 }
1514
1515 int_basicBlock *bblInstance::block() const {
1516     if ( ! func()->obj()->isExploratoryModeOn() ) {
1517         assert(block_);
1518     }
1519     return block_;
1520 }
1521
1522 void int_basicBlock::setHighLevelBlock(void *newb)
1523 {
1524    highlevel_block = newb;
1525 }
1526
1527 void *int_basicBlock::getHighLevelBlock() const {
1528    return highlevel_block;
1529 }
1530
1531 bool int_basicBlock::containsCall()
1532 {
1533     Block::edgelist & out_edges = llb()->targets();
1534     Block::edgelist::iterator eit = out_edges.begin();
1535     for( ; eit != out_edges.end(); ++eit) {
1536         if ( CALL == (*eit)->type() ) {
1537             return true;
1538         }
1539     }
1540     return false;
1541 }
1542
1543 int_function *bblInstance::func() const {
1544     assert(block_);
1545     return block_->func();
1546 }
1547
1548 AddressSpace *bblInstance::proc() const {
1549     assert(block_);
1550     return block_->func()->proc();
1551 }
1552
1553
1554 static Address relocLookup
1555 (const pdvector<bblInstance::reloc_info_t::relocInsn::Ptr> relocs, Address addr)
1556 {
1557     for (unsigned i = 0; i < relocs.size(); i++) {
1558         if (relocs[i]->origAddr == addr)
1559                 return relocs[i]->relocAddr;
1560         if (relocs[i]->relocAddr == addr)
1561             return relocs[i]->origAddr;
1562     }
1563     return 0;
1564 }
1565
1566 // addr can correspond to the new block or to the "this" block, unless
1567 // neither of the versions is an origInstance, in which case addr should 
1568 // correspond to the "this" block
1569 Address bblInstance::equivAddr(int newVersion, Address addr) const {
1570
1571     Address translAddr = 0;
1572
1573     if (newVersion == version()) {
1574         translAddr = addr;
1575         return translAddr;
1576     }
1577
1578 #if defined(cap_relocation)
1579
1580     // account for possible prior deletion of the int_basicBlock in 
1581     // exploratory mode
1582     if ( NULL == block_ ) {
1583         if ( ! func()->obj()->isExploratoryModeOn() ) {
1584             assert(0);
1585         }
1586         assert(0 == newVersion && "translating to non-zero version after int_basicBlock deletion");
1587
1588         unsigned int iidx=0;
1589         while (iidx < get_relocs().size() && 
1590                addr != get_relocs()[iidx]->origAddr) 
1591            iidx++;
1592         if (iidx < get_relocs().size()) {
1593             translAddr = get_relocs()[iidx]->origAddr;
1594         } else {
1595             mal_printf("%s[%d] WARNING: returning 0 in equivAddr, called on "
1596                     "bblInstance at %lx whose block has been deleted %lx\n", 
1597                     FILE__,__LINE__,firstInsnAddr_);
1598             return 0;
1599         }
1600     }
1601
1602     assert (newVersion < (int)block_->instances().size());
1603
1604     // do the translation
1605     if (0 == version()) {
1606         translAddr = relocLookup(block_->instVer(newVersion)->get_relocs(), 
1607                                  addr);
1608     }
1609     else if (0 == newVersion) {
1610         translAddr = relocLookup(get_relocs(), addr);
1611     } else { // neither version is non-zero, first translate to origInstance, 
1612              // then to the new version instance
1613         translAddr = block()->origInstance()->equivAddr(newVersion, 
1614                                                         equivAddr(0,addr));
1615     }
1616
1617 #endif
1618
1619     if (!translAddr) {
1620         fprintf(stderr,"ERROR: returning 0 in equivAddr, called on bblInstance"
1621                 " at %lx for new version %d in function at %lx %s[%d]\n", 
1622                 firstInsnAddr_, newVersion, 
1623                 block()->func()->getAddress(),FILE__,__LINE__);
1624         return 0;
1625     }
1626     return translAddr;
1627 }
1628
1629 void *bblInstance::getPtrToInstruction(Address addr) const {
1630     if (addr < firstInsnAddr_) return NULL;
1631     if (addr >= blockEndAddr_) return NULL;
1632
1633 #if defined(cap_relocation)
1634     if (version_ > 0) {
1635       // We might be relocated...
1636       if (getGeneratedBlock() != NULL) {
1637         addr -= firstInsnAddr();
1638         return getGeneratedBlock().get_ptr(addr);
1639       }
1640     }
1641 #endif
1642     
1643     return func()->obj()->getPtrToInstruction(addr);
1644
1645 }
1646
1647 void *bblInstance::get_local_ptr() const {
1648 #if defined(cap_relocation)
1649     if (!reloc_info) return NULL; 
1650     return reloc_info->generatedBlock_.start_ptr();
1651 #else
1652     return NULL;
1653 #endif
1654 }
1655
1656 int bblInstance::version() const 
1657 {
1658    return version_;
1659 }
1660
1661 #if defined(cap_relocation)
1662
1663 const void *bblInstance::getPtrToOrigInstruction(Address addr) const {
1664   if (version_ > 0) {
1665     for (unsigned i = 0; i < get_relocs().size(); i++) {
1666       if (get_relocs()[i]->relocAddr == addr) {
1667          return (const void *) get_relocs()[i]->origPtr;
1668       }
1669     }
1670     assert(0);
1671     return NULL;
1672   }
1673
1674   return getPtrToInstruction(addr);
1675 }
1676
1677 unsigned bblInstance::getRelocInsnSize(Address addr) const {
1678   if (version_ > 0) {
1679     for (unsigned i = 0; i < get_relocs().size()-1; i++) {
1680       if (get_relocs()[i]->relocAddr == addr)
1681         return get_relocs()[i+1]->relocAddr - get_relocs()[i]->relocAddr;
1682     }
1683     if (get_relocs()[get_relocs().size()-1]->relocAddr == addr) {
1684       return blockEndAddr_ - get_relocs()[get_relocs().size()-1]->relocAddr;
1685     }
1686     assert(0);
1687     return 0;
1688   }
1689   // ... uhh...
1690   // This needs to get handled by the caller
1691
1692   return 0;
1693 }
1694
1695 void bblInstance::getOrigInstructionInfo(Address addr, const void *&ptr, 
1696                                          Address &origAddr, 
1697                                          unsigned &origSize) const 
1698 {
1699    if (version_ > 0) {
1700       fprintf(stderr, "getPtrToOrigInstruction 0x%lx, version %d\n",
1701               addr, version_);
1702       for (unsigned i = 0; i < get_relocs().size(); i++) {
1703          if (get_relocs()[i]->relocAddr == addr) {
1704             fprintf(stderr, "... returning 0x%lx off entry %d\n",
1705                     get_relocs()[i]->origAddr,i);
1706             ptr = get_relocs()[i]->origPtr;
1707             origAddr = get_relocs()[i]->origAddr;
1708             if (i == (get_relocs().size()-1)) {
1709                origSize = blockEndAddr_ - get_relocs()[i]->relocAddr;
1710             }
1711             else
1712                origSize = get_relocs()[i+1]->relocAddr - get_relocs()[i]->relocAddr;
1713             return;
1714          }
1715       }
1716       assert(0);
1717       return;
1718    }
1719    
1720    // Must be handled by caller
1721    ptr = NULL;
1722    origAddr = 0;
1723    origSize = 0;
1724    return;
1725 }
1726
1727 unsigned &bblInstance::maxSize() {
1728    if (!reloc_info)
1729       reloc_info = new reloc_info_t();
1730    return reloc_info->maxSize_;
1731 }
1732
1733 unsigned &bblInstance::minSize() {
1734    if (!reloc_info)
1735       reloc_info = new reloc_info_t();
1736    return reloc_info->minSize_;
1737 }
1738
1739 bblInstance *&bblInstance::origInstance() {
1740    if (!reloc_info) {
1741       reloc_info = new reloc_info_t();
1742       reloc_info->origInstance_ = block_->origInstance();
1743    }
1744    return reloc_info->origInstance_;
1745 }
1746
1747 pdvector<funcMod *> &bblInstance::appliedMods() {
1748    if (!reloc_info)
1749       reloc_info = new reloc_info_t();
1750    return reloc_info->appliedMods_;
1751 }
1752
1753 codeGen &bblInstance::generatedBlock() {
1754   if (!reloc_info)
1755       reloc_info = new reloc_info_t();
1756   return reloc_info->generatedBlock_;
1757 }
1758
1759 functionReplacement *&bblInstance::jumpToBlock() {
1760   if (!reloc_info)
1761       reloc_info = new reloc_info_t();
1762   return reloc_info->jumpToBlock_;
1763 }
1764
1765 pdvector<bblInstance::reloc_info_t::relocInsn::Ptr> &bblInstance::get_relocs() const {
1766   assert(reloc_info);
1767   return reloc_info->relocs_;
1768 }
1769
1770 pdvector<bblInstance::reloc_info_t::relocInsn::Ptr> &bblInstance::relocs() {
1771   if (!reloc_info)
1772     reloc_info = new reloc_info_t();
1773   return get_relocs();
1774 }
1775
1776 unsigned bblInstance::getMaxSize() const {
1777    if (!reloc_info)
1778       return 0;
1779    return reloc_info->maxSize_;
1780 }
1781
1782 bblInstance *bblInstance::getOrigInstance() const {
1783    if (!reloc_info)
1784       return NULL;
1785    return reloc_info->origInstance_;
1786 }
1787
1788 pdvector<funcMod *> &bblInstance::getAppliedMods() const {
1789    assert(reloc_info);
1790    return reloc_info->appliedMods_;
1791 }
1792
1793 codeGen &bblInstance::getGeneratedBlock() const {
1794    assert(reloc_info);
1795    return reloc_info->generatedBlock_;
1796 }
1797
1798 functionReplacement *bblInstance::getJumpToBlock() const {
1799    if (!reloc_info)
1800       return NULL;
1801    return reloc_info->jumpToBlock_;
1802 }
1803
1804
1805 bblInstance::reloc_info_t::reloc_info_t() : 
1806    maxSize_(0), 
1807    minSize_(0), 
1808    origInstance_(NULL), 
1809    jumpToBlock_(NULL),
1810    funcRelocBase_(0)
1811 {};
1812
1813 bblInstance::reloc_info_t::reloc_info_t(reloc_info_t *parent, 
1814                                         int_basicBlock *block)  :
1815    maxSize_(0),
1816    minSize_(0),
1817    funcRelocBase_(0)
1818 {
1819    if (parent->origInstance_)
1820       origInstance_ = block->instVer(parent->origInstance_->version());
1821    else
1822       origInstance_ = NULL;
1823
1824    if (parent->jumpToBlock_)
1825        jumpToBlock_ = new functionReplacement(*(parent->jumpToBlock_));
1826    else
1827        jumpToBlock_ = NULL;
1828
1829    for (unsigned i = 0; i < parent->relocs_.size(); i++) {
1830      relocs_.push_back( parent->relocs_[i] );
1831    }
1832
1833 }
1834
1835 bblInstance::reloc_info_t::~reloc_info_t() {
1836   // XXX this wasn't safe, as copies of bblInstances
1837   //     reference the same relocInsns.
1838   //     relocs_ now holds shared_ptrs
1839   //for (unsigned i = 0; i < relocs_.size(); i++) {
1840   //  delete relocs_[i];
1841   //}
1842
1843 #if defined (cap_use_pdvector)
1844   relocs_.zap();
1845 #else
1846   relocs_.clear();
1847 #endif
1848
1849    // appliedMods is deleted by the function....
1850    // jumpToBlock is deleted by the process....
1851 };
1852
1853 #endif
1854
1855 int_basicBlock *functionReplacement::source() { 
1856    return sourceBlock_; 
1857 }
1858
1859 int_basicBlock *functionReplacement::target() { 
1860    return targetBlock_; 
1861 }
1862
1863 unsigned functionReplacement::sourceVersion() { 
1864    return sourceVersion_; 
1865 }
1866
1867 unsigned functionReplacement::targetVersion() { 
1868    return targetVersion_; 
1869 }
1870
1871
1872 // Dig down to the low-level block of b, find the low-level functions
1873 // that share it, and map up to int-level functions and add them
1874 // to the funcs list.
1875 bool int_function::getSharingFuncs(int_basicBlock *b,
1876                                    pdvector< int_function *> & funcs)
1877 {
1878     bool ret = false;
1879     if(!b->hasSharedBase())
1880         return ret;
1881
1882     vector<Function *> lfuncs;
1883     b->llb()->getFuncs(lfuncs);
1884     vector<Function *>::iterator fit = lfuncs.begin();
1885     for( ; fit != lfuncs.end(); ++fit) {
1886         image_func *ll_func = static_cast<image_func*>(*fit);
1887         int_function *hl_func = obj()->findFunction(ll_func);
1888         assert(hl_func);
1889
1890         if (hl_func == this) continue;
1891
1892         // Let's see if we've already got it...
1893         bool found = false;
1894         for (unsigned j = 0; j < funcs.size(); j++) {
1895             if (funcs[j] == hl_func) {
1896                 found = true;
1897                 break;
1898             }
1899         }
1900         if (!found) {
1901             ret = true;
1902             funcs.push_back(hl_func);
1903         }
1904     }
1905
1906     return ret;
1907 }
1908
1909 // Find overlapping functions via checking all basic blocks. We might be
1910 // able to check only exit points; but we definitely need to check _all_
1911 // exits so for now we're checking everything.
1912
1913 bool int_function::getOverlappingFuncs(pdvector<int_function *> &funcs) {
1914     bool ret = false;
1915
1916     funcs.clear();
1917
1918     // Create the block list.
1919     blocks();
1920
1921     set< int_basicBlock* , int_basicBlock::compare >::iterator bIter;
1922     for (bIter = blockList.begin(); 
1923          bIter != blockList.end(); 
1924          bIter++) {
1925         if (getSharingFuncs(*bIter,funcs))
1926             ret = true;
1927     }
1928
1929     return ret;
1930 }
1931
1932 Address int_function::get_address() const 
1933 {
1934 #if !defined(cap_relocation)
1935    return getAddress();
1936 #else
1937    if (!entryPoints_.size())
1938       return getAddress();
1939    
1940    instPoint *entryPoint = entryPoints_[0];
1941    int_basicBlock *block = entryPoint->block();
1942    bblInstance *inst = block->instVer(installedVersion_);
1943    return inst->firstInsnAddr();
1944 #endif 
1945 }
1946
1947 unsigned int_function::get_size() const 
1948 {
1949    assert(0);
1950    return 0x0;
1951 }
1952
1953 std::string int_function::get_name() const
1954 {
1955    return symTabName();
1956 }
1957
1958 bblInstance * bblInstance::getTargetBBL() {
1959     // Check to see if we need to fix up the target....
1960     pdvector<int_basicBlock *> targets;
1961     block_->getTargets(targets);
1962     
1963     // We have edge types on the internal data, so we drop down and get that. 
1964     // We want to find the "branch taken" edge and override the destination
1965     // address for that guy.
1966     Block::edgelist & out_edges = block_->llb()->targets();
1967     
1968     // May be greater; we add "extra" edges for things like function calls, etc.
1969     assert (out_edges.size() >= targets.size());
1970    
1971     Block::edgelist::iterator eit = out_edges.begin();
1972     for( ; eit != out_edges.end(); ++eit) {
1973         EdgeTypeEnum edgeType = (*eit)->type();
1974         if ((edgeType == COND_TAKEN) ||
1975             (edgeType == DIRECT)) {
1976             // Got the right edge... now find the matching high-level
1977             // basic block
1978             image_basicBlock *llTarget = (image_basicBlock*)(*eit)->trg();
1979             int_basicBlock *hlTarget = NULL;
1980             for (unsigned t_iter = 0; t_iter < targets.size(); t_iter++) {
1981                 // Should be the same index, but this is a small set...
1982                 if (targets[t_iter]->llb() == llTarget) {
1983                     hlTarget = targets[t_iter];
1984                     break;
1985                 }
1986             }
1987             if (hlTarget == NULL) {
1988                 fprintf(stderr, "targets:%d out_edges:%d src:0x%lx->0x%lx trg:0x%lx->0x%lx\n", 
1989                         (int)targets.size(), (int)out_edges.size(), (*eit)->src()->start(), (*eit)->src()->end(), (*eit)->trg()->start(), (*eit)->trg()->end());
1990             }
1991
1992             assert(hlTarget != NULL);
1993             return hlTarget->instVer(version_);
1994         }
1995     }
1996     return NULL;
1997 }
1998
1999 bblInstance * bblInstance::getFallthroughBBL() {
2000
2001     if (func()->obj()->isExploratoryModeOn()) {
2002         // if this bblInstance has been invalidated, see if block splitting has
2003         // happened, in which case, get the latter of the two blocks and 
2004         // return its fallthrough block 
2005         if ( block_->instances().size() <= (unsigned) version_ ||
2006              this != block_->instVer(version_) ) 
2007         {
2008             bblInstance *origInst = func()->findBlockInstanceByAddr
2009                 (get_relocs().back()->origAddr);
2010             return origInst->getFallthroughBBL();
2011         }
2012     }
2013
2014     // Check to see if we need to fix up the target....
2015     pdvector<int_basicBlock *> targets;
2016     block_->getTargets(targets);
2017     
2018     // We have edge types on the internal data, so we drop down and get that. 
2019     // We want to find the "branch taken" edge and override the destination
2020     // address for that guy.
2021     Block::edgelist & out_edges = block_->llb()->targets();
2022     
2023     // May be greater; we add "extra" edges for things like function calls, etc.
2024     assert (out_edges.size() >= targets.size());
2025
2026     NoSinkPredicate nsp;
2027     
2028     Block::edgelist::iterator eit = out_edges.begin(&nsp);
2029     for( ; eit != out_edges.end(); ++eit) {
2030         EdgeTypeEnum edgeType = (*eit)->type();
2031         if ((edgeType == COND_NOT_TAKEN) ||
2032             (edgeType == FALLTHROUGH) ||
2033             (edgeType == CALL_FT)) {
2034             // Got the right edge... now find the matching high-level
2035             // basic block
2036             image_basicBlock *llTarget = (image_basicBlock*)(*eit)->trg();
2037             int_basicBlock *hlTarget = NULL;
2038             for (unsigned t_iter = 0; t_iter < targets.size(); t_iter++) {
2039                 // Should be the same index, but this is a small set...
2040                 if (targets[t_iter]->llb() == llTarget) {
2041                     hlTarget = targets[t_iter];
2042                     break;
2043                 }
2044             }
2045             assert(hlTarget != NULL);
2046             
2047             return hlTarget->instVer(version_);
2048         }
2049     }
2050     return NULL;
2051 }
2052
2053
2054 bool int_function::performInstrumentation(bool stopOnFailure,
2055                                           pdvector<instPoint *> &failedInstPoints) {
2056
2057     // We have the following possible side-effects:
2058     // 
2059     // 1) Generating an instPoint (e.g., creating the multiTramp and its code)
2060     //    may determine the function is too small to fit the instrumentation,
2061     //    requiring relocation.
2062     // 2) Instrumenting a shared block may also trigger relocation as a 
2063     //    mechanism to unwind the sharing. 
2064     //
2065     // 3) Relocation will add additional instPoint instances.
2066     //
2067
2068     // Thus, we have the following order of events:
2069     //
2070     // 1) Generate all instPoints that actually have instrumentation. 
2071     //    This will identify whether the function requires relocation.
2072     // 2) If relocation is necessary:
2073     // 2a) Generate relocation; this will create the function copy and update 
2074     //     function-local data structures.
2075     // 2b) Install relocation; this will update process-level data structures and
2076     //     copy the relocated function into the address space.
2077     // 2c) Generate instPoints again to handle any new instPointInstances that
2078     //     have showed up. This should _not_ result in required relocation.
2079     // 3) Install instPoints
2080     // 4) Link (relocated copy of the function) and instPoints.
2081
2082     // Assumptions: 
2083     // 1) calling generate/install/link on "empty" instPoints has no effect.
2084     // 2) Generate/install/link operations are idempotent.
2085
2086     // Let's avoid a lot of work and collect up all instPoints that have
2087     // something interesting going on; that is, that have instrumentation
2088     // added since the last time something came up. 
2089
2090 #if defined(arch_x86_64)
2091   if(proc()->getAddressWidth() == 8)
2092   {
2093     ia32_set_mode_64(true);
2094   }
2095   else
2096   {
2097     ia32_set_mode_64(false);
2098   }
2099 #endif  
2100
2101   if (isBeingInstrumented_) return false;
2102   isBeingInstrumented_ = true;
2103
2104     std::set<instPoint *> newInstrumentation;
2105     std::set<instPoint *> anyInstrumentation;
2106
2107     getNewInstrumentation(newInstrumentation);
2108     getAnyInstrumentation(anyInstrumentation);
2109
2110     // Quickie correctness assert: newInstrumentation \subseteq anyInstrumentation
2111     assert(newInstrumentation.size() <= anyInstrumentation.size()); 
2112
2113     bool relocationRequired = false;
2114
2115     // Step 1: Generate all new instrumentation
2116     generateInstrumentation(newInstrumentation, failedInstPoints, relocationRequired); 
2117     
2118     if (failedInstPoints.size() && stopOnFailure) {
2119       isBeingInstrumented_ = false;
2120       return false;
2121     }
2122
2123 #if defined(cap_relocation)
2124     // Step 2: is relocation necessary?
2125     if (relocationRequired) {
2126         // Yar.
2127         // This will calculate the sizes required for our basic blocks.
2128         expandForInstrumentation();
2129         
2130         // And keep a list of other functions that need relocation due to
2131         // sharing.
2132         pdvector<int_function *> need_reloc;
2133
2134         // Generate the relocated copy of the function.
2135         relocationGenerate(enlargeMods(), 0, need_reloc);
2136         
2137         // Install the relocated copy of the function.
2138         relocationInstall();
2139
2140         // Aaaand link it. 
2141         pdvector<codeRange *> overwritten_objs;
2142         relocationLink(overwritten_objs);
2143
2144         // We've added a new version of the function; therefore, we need
2145         // to update _everything_ that's been instrumented. 
2146         // We do this in two ways. First, we call generate on all
2147         // instPoints to get them in the right place.
2148         // Second, we replace newInstrumentation with anyInstrumentation,
2149         // then call install/link as normal.
2150
2151         // Clear the failedInstPoints vector first; we'll re-generate
2152         // it in any case.
2153         failedInstPoints.clear();
2154         relocationRequired = false;
2155
2156         // Update instPoint instances to include the new function
2157         for (std::set<instPoint*>::iterator iter = anyInstrumentation.begin();
2158              iter != anyInstrumentation.end(); 
2159              iter++) 
2160         {
2161             (*iter)->updateInstancesBatch();
2162         }
2163         // We _explicitly_ don't call the corresponding updateInstancesFinalize,
2164         // as the only purpose of that function is to regenerate instrumentation;
2165         // we do that explicitly below.
2166
2167         generateInstrumentation(anyInstrumentation,
2168                                 failedInstPoints,
2169                                 relocationRequired);
2170         // I'm commenting this out; I originally thought it would be the case,
2171         // but on further thought the original instPoint will _still_ be asking
2172         // for relocation. 
2173         //assert(relocationRequired == false);
2174
2175         newInstrumentation = anyInstrumentation;
2176
2177         // If there are any other functions that we need to relocate
2178         // due to this relocation, handle it now. We don't care if they
2179         // fail to install instrumentation though.
2180         pdvector<instPoint *> dontcare;
2181         for (unsigned i = 0; i < need_reloc.size(); i++) {
2182           need_reloc[i]->performInstrumentation(false, dontcare);
2183         }
2184
2185         mal_printf("%s[%d] relocating function at %lx\n", 
2186                    __FILE__,__LINE__,getAddress());
2187     }
2188 #endif
2189
2190     // Okay, back to what we were doing...
2191     
2192     installInstrumentation(newInstrumentation,
2193                            failedInstPoints);
2194     linkInstrumentation(newInstrumentation,
2195                         failedInstPoints);
2196
2197     if (obj()->isSharedLib()) {
2198         //printf("===> Instrumenting function in shared library: %s [%s]\n",
2199                 //prettyName().c_str(), obj()->fileName().c_str());
2200         obj()->setDirty();
2201     }
2202
2203     isBeingInstrumented_ = false;
2204     return (failedInstPoints.size() == 0);
2205 }
2206
2207 void int_function::getNewInstrumentation(std::set<instPoint *> &ret) {
2208     for (unsigned i = 0; i < entryPoints_.size(); i++) {
2209         if (entryPoints_[i]->hasNewInstrumentation()) {
2210             ret.insert(entryPoints_[i]);
2211         }
2212     }
2213     for (unsigned i = 0; i < exitPoints_.size(); i++) {
2214         if (exitPoints_[i]->hasNewInstrumentation()) {
2215             ret.insert(exitPoints_[i]);
2216         }
2217     }
2218     for (unsigned i = 0; i < callPoints_.size(); i++) {
2219         if (callPoints_[i]->hasNewInstrumentation()) {
2220             ret.insert(callPoints_[i]);
2221         }
2222     }
2223     for (unsigned i = 0; i < arbitraryPoints_.size(); i++) {
2224         if (arbitraryPoints_[i]->hasNewInstrumentation()) {
2225             ret.insert(arbitraryPoints_[i]);
2226         }
2227     }
2228     std::set< instPoint* >::iterator pIter = unresolvedPoints_.begin();
2229     while ( pIter != unresolvedPoints_.end() ) {
2230         if ( (*pIter)->hasNewInstrumentation() ) {
2231             ret.insert( *pIter );
2232         }
2233         pIter++;
2234     }
2235     pIter = abruptEnds_.begin();
2236     while ( pIter != abruptEnds_.end() ) {
2237         if ( (*pIter)->hasNewInstrumentation() ) {
2238             ret.insert( *pIter );
2239         }
2240         pIter++;
2241     }
2242 }
2243
2244 void int_function::getAnyInstrumentation(std::set<instPoint *> &ret) {
2245     for (unsigned i = 0; i < entryPoints_.size(); i++) {
2246         if (entryPoints_[i]->hasAnyInstrumentation()) {
2247             ret.insert(entryPoints_[i]);
2248         }
2249     }
2250     for (unsigned i = 0; i < exitPoints_.size(); i++) {
2251         if (exitPoints_[i]->hasAnyInstrumentation()) {
2252             ret.insert(exitPoints_[i]);
2253         }
2254     }
2255     for (unsigned i = 0; i < callPoints_.size(); i++) {
2256         if (callPoints_[i]->hasAnyInstrumentation()) {
2257             ret.insert(callPoints_[i]);
2258         }
2259     }
2260     for (unsigned i = 0; i < arbitraryPoints_.size(); i++) {
2261         if (arbitraryPoints_[i]->hasAnyInstrumentation()) {
2262             ret.insert(arbitraryPoints_[i]);
2263         }
2264     }
2265     std::set< instPoint* >::iterator pIter = unresolvedPoints_.begin();
2266     while ( pIter != unresolvedPoints_.end() ) {
2267         if ( (*pIter)->hasAnyInstrumentation() ) {
2268             ret.insert( *pIter );
2269         }
2270         pIter++;
2271     }
2272     pIter = abruptEnds_.begin();
2273     while ( pIter != abruptEnds_.end() ) {
2274         if ( (*pIter)->hasAnyInstrumentation() ) {
2275             ret.insert( *pIter );
2276         }
2277         pIter++;
2278     }
2279 }
2280
2281 void int_function::generateInstrumentation(std::set<instPoint *> &input,
2282                                            pdvector<instPoint *> &failed,
2283                                            bool &relocationRequired) {
2284     for (std::set<instPoint*>::iterator iter = input.begin();
2285          iter != input.end(); 
2286          iter++) 
2287     {
2288         switch ((*iter)->generateInst()) {
2289         case instPoint::tryRelocation:
2290             relocationRequired = true;
2291             break;
2292         case instPoint::generateSucceeded:
2293             break;
2294         case instPoint::generateFailed:
2295             failed.push_back(*iter);
2296             break;
2297         default:
2298             assert(0);
2299             break;
2300         }
2301     }
2302 }
2303
2304 void int_function::installInstrumentation(std::set<instPoint *> &input,
2305                                           pdvector<instPoint *> &failed) {
2306     for (std::set<instPoint*>::iterator iter = input.begin();
2307          iter != input.end(); 
2308          iter++) 
2309     {
2310         switch ((*iter)->installInst()) {
2311         case instPoint::wasntGenerated:
2312             break;
2313         case instPoint::installSucceeded:
2314             break;
2315         case instPoint::installFailed:
2316             failed.push_back(*iter);
2317             break;
2318         default:
2319             assert(0);
2320             break;
2321         }
2322     }
2323 }
2324
2325
2326 void int_function::linkInstrumentation(std::set<instPoint *> &input,
2327                                        pdvector<instPoint *> &failed) {
2328     for (std::set<instPoint*>::iterator iter = input.begin();
2329          iter != input.end(); 
2330          iter++) 
2331     {
2332         switch ((*iter)->linkInst()) {
2333         case instPoint::wasntInstalled:
2334             break;
2335         case instPoint::linkSucceeded:
2336             break;
2337         case instPoint::linkFailed:
2338             failed.push_back(*iter);
2339             break;
2340         default:
2341             assert(0);
2342             break;
2343         }
2344     }
2345 }
2346
2347
2348 Offset int_function::addrToOffset(const Address addr) const { 
2349     return addr - getAddress() + ifunc_->getOffset(); 
2350 }
2351
2352 const pdvector< int_parRegion* > &int_function::parRegions()
2353 {
2354   if (parallelRegions_.size() > 0)
2355     return parallelRegions_;
2356
2357   for (unsigned int i = 0; i < ifunc_->parRegions().size(); i++)
2358     {
2359       image_parRegion * imPR = ifunc_->parRegions()[i];
2360       //int_parRegion * iPR = new int_parRegion(imPR, baseAddr, this); 
2361       int_parRegion * iPR = new int_parRegion(imPR, addr_, this); 
2362       parallelRegions_.push_back(iPR);
2363     }
2364   return parallelRegions_;
2365 }
2366
2367 #if defined(cap_instruction_api) 
2368 void bblInstance::getInsnInstances(std::vector<std::pair<InstructionAPI::Instruction::Ptr, Address> >&instances) const {
2369   instances.clear();
2370   block()->llb()->getInsnInstances(instances);
2371   for (unsigned i = 0; i < instances.size(); ++i) {
2372     instances[i].second += firstInsnAddr_ - block()->llb()->start();
2373   }
2374 }
2375 #endif
2376 #if 0
2377 int_basicBlock *int_function::findBlockByImage(image_basicBlock *block) {
2378   unsigned img_id = block->id();
2379   unsigned int_id = blockIDmap[img_id];
2380   return blockList[int_id];
2381 }
2382 #endif
2383
2384
2385 /* removes all function blocks in the specified range
2386  */
2387 bool int_function::removeFunctionSubRange(
2388                    Address startAddr, 
2389                    Address endAddr, 
2390                    std::vector<Address> &deadBlockAddrs,
2391                    int_basicBlock *&entryBlock)
2392 {
2393     std::vector<int_basicBlock *> deadBlocks;
2394     std::vector<ParseAPI::Block *> papiDeadBlocks;
2395
2396     findBlocksByRange(deadBlocks,startAddr,endAddr);
2397
2398     // warning if blocks are instrumented
2399     vector<int_basicBlock *>::iterator biter = deadBlocks.begin();
2400     for (; biter != deadBlocks.end(); biter++) {
2401         assert( (*biter)->func() == this );
2402         codeRange* range = proc()->findModByAddr
2403             ((*biter)->origInstance()->firstInsnAddr());
2404         if (range) {
2405             fprintf(stderr,"WARNING: mod range %lx %lx for purged block "
2406                     "%lx %lx %s[%d]\n", range->get_address(), 
2407                     range->get_address()+range->get_size(),
2408                     (*biter)->origInstance()->firstInsnAddr(),
2409                     (*biter)->origInstance()->endAddr(),FILE__,__LINE__);
2410             proc()->removeModifiedRange(range);
2411             return false;
2412         }
2413         deadBlockAddrs.push_back((*biter)->origInstance()->firstInsnAddr());
2414         papiDeadBlocks.push_back((*biter)->llb());
2415     }
2416     
2417     // set new entry point 
2418     setNewEntryPoint( entryBlock );
2419
2420     // remove dead image_basicBlocks and int_basicBlocks
2421     ifunc()->deleteBlocks( papiDeadBlocks, entryBlock->llb() );
2422     for (biter = deadBlocks.begin(); biter != deadBlocks.end(); biter++) {
2423         deleteBlock(*biter);
2424     }
2425
2426     return true;
2427 }
2428
2429