Fix for (1) power deadlock problem due to incorrect lock implementation. We now use...
[dyninst.git] / dyninstAPI / src / image-flowGraph.C
1 /*
2  * Copyright (c) 1996-2004 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as "Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * This license is for research uses.  For such uses, there is no
12  * charge. We define "research use" to mean you may freely use it
13  * inside your organization for whatever purposes you see fit. But you
14  * may not re-distribute Paradyn or parts of Paradyn, in any form
15  * source or binary (including derivatives), electronic or otherwise,
16  * to any other organization or entity without our permission.
17  * 
18  * (for other uses, please contact us at paradyn@cs.wisc.edu)
19  * 
20  * All warranties, including without limitation, any warranty of
21  * merchantability or fitness for a particular purpose, are hereby
22  * excluded.
23  * 
24  * By your use of Paradyn, you understand and agree that we (or any
25  * other person or entity with proprietary rights in Paradyn) are
26  * under no obligation to provide either maintenance services,
27  * update services, notices of latent defects, or correction of
28  * defects for Paradyn.
29  * 
30  * Even if advised of the possibility of such damages, under no
31  * circumstances shall we (or any other person or entity with
32  * proprietary rights in the software licensed hereunder) be liable
33  * to you or any third party for direct, indirect, or consequential
34  * damages of any character regardless of type of action, including,
35  * without limitation, loss of profits, loss of use, loss of good
36  * will, or computer failure or malfunction.  You agree to indemnify
37  * us (and any other person or entity with proprietary rights in the
38  * software licensed hereunder) for any and all liability it may
39  * incur to third parties resulting from your use of Paradyn.
40  */
41
42 /*
43  * $Id: image-flowGraph.C,v 1.54 2008/10/28 18:42:46 bernat Exp $
44  */
45
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <assert.h>
50
51 #include "dyninstAPI/src/symtab.h"
52 #include "dyninstAPI/src/arch.h"
53 #include "dyninstAPI/src/instPoint.h"
54 #include "symtabAPI/h/Symtab.h"
55
56 #include "dyninstAPI/src/parRegion.h"
57
58 #include <fstream>
59 #include "dyninstAPI/src/util.h"
60 #include "dyninstAPI/src/debug.h"
61
62 #include "dyninstAPI/src/InstrucIter.h"
63
64 #include "symtabAPI/h/LineInformation.h"
65 #include "dyninstAPI/h/BPatch_flowGraph.h"
66
67 #if defined(TIMED_PARSE)
68 #include <sys/time.h>
69 #endif
70
71 using namespace Dyninst;
72
73 #if defined (cap_use_pdvector)
74 int addrfunccmp( image_func*& pdf1, image_func*& pdf2 )
75 {
76     if ( pdf1->getOffset() > pdf2->getOffset() )
77         return 1;
78     if ( pdf1->getOffset() < pdf2->getOffset() )
79         return -1;
80     return 0;
81 }
82 #else
83 //stl needs strict weak ordering (true if arg 1 is strictly less than 2)
84 bool addrfunccmp( image_func* pdf1, image_func* pdf2 )
85 {
86    if ( pdf1->getOffset() < pdf2->getOffset() )
87       return true;
88    return false;
89 #if 0
90            if ( pdf1->getOffset() < pdf2->getOffset() )
91                       return false;
92                return false;
93 #endif
94 }
95 #endif
96
97
98 //analyzeImage() constructs a flow graph of basic blocks in the image and
99 //assigns them to functions. It follows calls to discover new functions,
100 //as well as using heuristics to search for functions within gaps between
101 //basic blocks.
102 //
103 //Note: basic blocks may be shared between several functions
104 bool image::analyzeImage()
105 {
106 #if defined(TIMED_PARSE)
107   struct timeval starttime;
108   gettimeofday(&starttime, NULL);
109 #endif
110
111   stats_parse.startTimer(PARSE_ANALYZE_TIMER);
112
113 #if defined(arch_x86_64)
114     ia32_set_mode_64(getObject()->getAddressWidth() == 8);
115 #endif
116   
117   image_func *pdf;
118   pdmodule *mod = NULL;
119
120   assert(parseState_ < analyzed);
121   // Prevent recursion: with this set we can call findFoo as often as we want
122   parseState_ = analyzing;
123
124   if (parseState_ < symtab) {
125     fprintf(stderr, "Error: attempt to analyze before function lists built\n");
126     stats_parse.stopTimer(PARSE_ANALYZE_TIMER);
127     return true;
128   }
129   
130     // Parse the functions reported in the symbol table
131     for(unsigned i=0; i<symtabCandidateFuncs.size(); ++i) {
132         pdf = symtabCandidateFuncs[i];
133         mod = pdf->pdmod();
134
135         assert(pdf); assert(mod);
136
137         // Check whether recursive traversal has dealt with a function
138         // before proceeding to parse it
139         if(!pdf->parsed()) {
140             parsing_printf("[%s:%u] calling parse() at 0x%lx (ptr %p)\n",
141                 FILE__,__LINE__,pdf->getOffset(),pdf);
142     
143             if(pdf->parse()) {
144                 enterFunctionInTables(pdf);
145             } else {
146                 parsing_printf("[%s:%u] symtab-defined function %s at 0x%lx "
147                                "failed to parse\n",FILE__,__LINE__,
148                     pdf->symTabName().c_str(),pdf->getOffset());
149
150                 /** FIXME removing symtab-defined functions is currently unsafe,
151                     as there is no mechanism to remove the symbols (and thus
152                     their up-pointers) through the SymtabAPI. See bug 906.
153                     For the time being, leaving these crufty functions around.
154                 **/
155                 // symtab functions need to be removed from the list
156                 // of known functions by address
157                 //funcsByEntryAddr.undef(pdf->getOffset());
158                 //delete pdf;
159             }
160         }
161     }
162
163     /* Some functions that built their block lists through the
164        `shared code' mechanism may be incomplete. Finish them. */
165     parsing_printf("[%s:%u] Completing `shared code' parsing: %u entries\n",
166         FILE__,__LINE__,reparse_shared.size());
167     for(unsigned i=0;i<reparse_shared.size();++i) {
168         pair<image_basicBlock *,image_func*> & p = reparse_shared[i];
169         (p.second)->parseSharedBlocks(p.first);
170     }
171
172   /* 
173      OPENMP PARSING CODE
174
175      For starters, we have all functions with some
176      sort of indentifier in the function name ('@' for Power),
177      designanting an outlined OpenMP Function.  This is contained in the
178      'parallelRegions' variable.  We then want to find out the parent function
179      for each of these regions, i.e. the function that sets all the parameters
180      for a given OpenMP function.  We need this parent function to gather
181      information about what kind of directive is associated with this outline
182      function and the clauses associated with that function
183   */
184
185 #if defined(os_solaris) || defined(os_aix)
186   image_parRegion *parReg;
187   int currentSectionNum = 0;
188
189   // Most of the time there will be no parallel regions in an image, so nothing will happen.
190   // If there is a parallel region we examine it individually 
191   for (unsigned i = 0; i < parallelRegions.size(); i++) 
192     {
193       parReg = parallelRegions[i];
194       if (parReg == NULL)
195         continue;
196       else
197         {
198           // Every parallel region has the image_func that contains the
199           //   region associated with it 
200           image_func * imf = parReg->getAssociatedFunc();
201           
202           // Returns pointers to all potential image_funcs that correspond
203           //   to what could be the parent OpenMP function for the region 
204           const pdvector<image_func *> *prettyNames = 
205             findFuncVectorByPretty(imf->calcParentFunc(imf, parallelRegions));
206           
207           //There may be more than one (or none) functions with that name, we take the first 
208           // This function gives us all the information about the parallel region by getting
209           // information for the parallel region parent function 
210           if (prettyNames->size() > 0)
211             imf->parseOMP(parReg, (*prettyNames)[0], currentSectionNum);
212           else
213             continue;
214         }
215     }
216     
217   /**************************/
218   /* END OpenMP Parsing Code */
219   /**************************/
220   
221 #endif 
222
223 #if 0 // stripped binary parsing in its current form is dangerous
224 #if defined(cap_stripped_binaries)
225   if (parseGaps_) {
226      mod = getOrCreateModule("DEFAULT_MODULE", linkedFile->imageOffset());
227     // Sort functions to find gaps
228     VECTOR_SORT(everyUniqueFunction, addrfunccmp);
229
230     Address gapStart = linkedFile->imageOffset();
231     Address gapEnd;
232     int funcIdx = 0; // index into everyUniqueFunction (which may be empty)
233     do {
234         /* The everyUniqueFunction vector can be updated in the for
235            loop.  If functions are discovered preceeding the one
236            pointed to by the current 'funcIdx' index, we need to
237            advance funcIdx and gapStart to the correct position */
238         /* (It would be better to use a data structure with an iterator
239            that doesn't break on inserts, such as an ordered set) */
240        while(funcIdx < (int) everyUniqueFunction.size() && 
241              everyUniqueFunction[funcIdx]->getEndOffset() < gapStart)
242           {
243              /*parsing_printf("[%s:%u] advancing gap index (gapstart: 0x%lx, "
244                "offset: 0x%lx)\n", FILE__,__LINE__,
245                gapStart,everyUniqueFunction[funcIdx]->getOffset());*/
246              funcIdx++;
247           }
248        if (funcIdx > 0 && funcIdx < (int) everyUniqueFunction.size() 
249            && gapStart < everyUniqueFunction[funcIdx]->getEndOffset()) {
250           gapStart = everyUniqueFunction[funcIdx]->getEndOffset();
251        }
252        
253        // set gapEnd to start of next function or end of code section, 
254        // on 1st iter, set gapEnd to start of 1st function or end of code section
255        if (funcIdx == 0 && everyUniqueFunction.size() > 0) {
256           gapEnd = everyUniqueFunction[0]->getOffset();
257        }
258        else if(funcIdx+1 < (int)everyUniqueFunction.size()) {
259           gapEnd = everyUniqueFunction[funcIdx + 1]->getOffset();
260        } 
261        // if there is no next function, set gapEnd to end of codeSection
262        else if (funcIdx+1 == (int) everyUniqueFunction.size() || funcIdx == 0) {
263           gapEnd = linkedFile->imageOffset() + linkedFile->imageLength();
264        }
265        else {  
266           break; // advanced gap past the end of the code section
267        }
268        
269        int gap = gapEnd - gapStart;
270        
271        parsing_printf("[%s:%u] scanning for prologues in range 0x%lx-0x%lx\n",
272                       FILE__,__LINE__,gapStart, gapEnd);
273        
274        //gap should be big enough to accomodate a function prologue
275        if(gap >= 5) {
276           Address pos = gapStart;
277           while( pos < gapEnd && isCode( pos ) )
278              {
279                 const unsigned char* instPtr;
280                 instPtr = (const unsigned char *)getPtrToInstruction( pos );
281                 instruction insn;
282                 insn.setInstruction( instPtr, pos );
283                 
284                 if( isFunctionPrologue(insn) && !funcsByEntryAddr.defines(pos))
285                    {
286                       char name[32];
287                       snprintf( name, 32, "gap_%lx", pos );
288                       pdf = new image_func( name, pos, UINT_MAX, mod, this);
289                       
290                       if(pdf->parse()) {
291                          pdf->addSymTabName(name); // newly added
292                          pdf->addPrettyName( name );
293                          
294                          // Update size
295                          // TODO FIXME: should update _all_ symbol sizes....
296                          pdf->getSymtabFunction()->getFirstSymbol()->setSize(pdf->get_size());
297
298                          enterFunctionInTables(pdf, false);
299                         
300                         // FIXME this code has not been updated to the reality
301                         // of RT parsing at this point, and will not work
302                         // or even compile  
303
304                          // If any calls were discovered, adjust our
305                          // position in the function vector accordingly
306                          if(callTargets.size() > 0) {            
307                             while( callTargets.size() > 0 )
308                                {   
309                                   /* parse static call targets */
310                                   parseStaticCallTargets( callTargets, 
311                                                           new_targets, preParseStubs );
312                                   callTargets.clear(); 
313                                   
314                                   VECTOR_APPEND(callTargets,new_targets);
315                                   new_targets.clear();
316                                }
317                             // Maintain sort
318                             VECTOR_SORT(everyUniqueFunction, addrfunccmp);
319                             break;  // Return to for loop
320                          } else {
321                             VECTOR_SORT(everyUniqueFunction, addrfunccmp);
322                          }
323                          
324                          pos = ( gapEnd < pdf->getEndOffset() ?
325                                  gapEnd : pdf->getEndOffset() );
326                       } else {
327                          pos++;
328                       }
329                    } else
330                       pos++;
331              }// end while loop
332        }
333        // set gapStart for next iteration & increment funcIdx
334        if (funcIdx < (int)everyUniqueFunction.size()) {
335           gapStart = everyUniqueFunction[funcIdx]->getEndOffset();
336        }
337        funcIdx++;
338     } while (funcIdx < (int)everyUniqueFunction.size());
339   } // end gap parsing
340 #endif
341 #endif // if 0
342
343     // Bind all intra-module call points
344     for (unsigned b_iter = 0; b_iter < everyUniqueFunction.size(); b_iter++) {
345        everyUniqueFunction[b_iter]->checkCallPoints();
346     }
347     
348 #if defined(TIMED_PARSE)
349   struct timeval endtime;
350   gettimeofday(&endtime, NULL);
351   unsigned long lstarttime = starttime.tv_sec * 1000 * 1000 + starttime.tv_usec;
352   unsigned long lendtime = endtime.tv_sec * 1000 * 1000 + endtime.tv_usec;
353   unsigned long difftime = lendtime - lstarttime;
354   double dursecs = difftime/(1000 );
355   cout << __FILE__ << ":" << __LINE__ <<": analyzeImage of " << name_ << " took "<<dursecs <<" msecs" << endl;
356 #endif 
357
358   
359   parseState_ = analyzed;
360   stats_parse.stopTimer(PARSE_ANALYZE_TIMER);
361   return true;
362 }
363
364 void image::recordFunction(image_func *f)
365 {
366     // update symtab's notion of "size"
367     // TODO FIXME: should update _all_ symbol sizes....
368     f->getSymtabFunction()->getFirstSymbol()->setSize(f->get_size());
369
370     enterFunctionInTables(f);
371
372     // update names
373     // XXX this first call to addSymTabName certainly looks redundant...
374     f->addSymTabName(f->symTabName().c_str());
375     f->addPrettyName(f->symTabName().c_str() );
376 }
377
378 /* parse is responsible for initiating parsing of a function. instPoints
379  * and image_basicBlocks are created at this level.
380  *
381  * Returns: success or failure
382  */
383 bool image_func::parse()
384 {
385     pdvector< image_basicBlock * > entryBlocks_;
386
387     if(parsed_)
388     {
389         parsing_printf("[%s:%u] multiple call of parse() for %s\n",
390                 symTabName().c_str());
391         return false;
392     }
393     parsed_ = true;
394
395     parsing_printf("[%s:%u] parsing %s at 0x%lx\n", FILE__,__LINE__,
396                 symTabName().c_str(), getOffset());
397
398     if(!img()->isValidAddress(getOffset())) {
399         parsing_printf("[%s:%u] refusing to parse from invalid address 0x%lx\n",
400             FILE__,__LINE__,getOffset());
401         return false;
402     }
403
404     // Some architectures have certain functions or classes of functions
405     // that should appear to be parsed without actually being parsed.
406     if(archAvoidParsing())
407     {
408         retStatus_ = RS_UNKNOWN;
409         return true;
410     }
411
412     // For some architectures, some functions are recognizably unparseable 
413     // from the start.
414     if(archIsUnparseable())
415     {
416         retStatus_ = RS_UNKNOWN;
417         parsing_printf("[%s:%u] archIsUnparseable denied parse at 0x%lx\n",
418             FILE__,__LINE__,getOffset());
419         return false;
420     }
421
422     // Optimistic assumptions
423     instLevel_ = NORMAL;
424     canBeRelocated_ = true;
425
426     noStackFrame = true;
427     image_instPoint *p;
428
429     Address funcBegin = getOffset();
430     Address funcEntryAddr = funcBegin;
431
432     InstrucIter ah(funcBegin, this);
433
434     // Test for various reasons we might just give up and die
435     if( !archCheckEntry( ah, this ) )
436     {
437         // Check for redirection to another function as first insn.
438         // XXX This is a heuristic that we should revisit; is it really
439         //     a common idiom for redirection to another function? Why
440         //     do we need to treat it that way? -- nate & kevin
441         if( ah.isAJumpInstruction() )
442         {
443             Address target = ah.getBranchTargetAddress();
444
445             parsing_printf("[%s:%u] direct jump to 0x%lx at entry point\n",
446                 FILE__,__LINE__,target);
447
448             if(img()->isCode(target)) {
449                 // Recursively parse this function
450                 bindCallTarget(target,NULL);
451             } else {
452                 // Create instrumentation point for on-demand parsing
453                 p = new image_instPoint(*ah,
454                                         ah.getInstruction(),
455                                         this,
456                                         target,
457                                         false,
458                                         false,
459                                         otherPoint); 
460                 pdmod()->addUnresolvedControlFlow(p);
461             }
462         }
463         endOffset_ = ah.peekNext();
464         instLevel_ = UNINSTRUMENTABLE; 
465
466         retStatus_ = RS_UNKNOWN;
467         parsing_printf("[%s:%u] archCheckEntry denied parse at 0x%lx\n",
468             FILE__,__LINE__,getOffset());
469         return false;
470     }
471
472     // Some architectures (alpha) have an entry point that is
473     // not the function start; knowing where it is enables
474     // correct instrumentation.
475     archGetFuncEntryAddr(funcEntryAddr);
476     ah.setCurrentAddress(funcEntryAddr);
477
478     // Define entry point
479     p = new image_instPoint(funcEntryAddr,
480                             ah.getInstruction(),
481                             this,
482                             functionEntry);
483     funcEntries_.push_back(p);
484
485     int frameSize;
486     InstrucIter tmp(ah);
487     if(tmp.isStackFramePreamble(frameSize))
488     {
489         archSetFrameSize(frameSize);
490         noStackFrame = false;
491     }
492     savesFP_ = ah.isFramePush();    // only ever true on x86
493
494     // Architecture-specific "do not relocate" list
495     if(archNoRelocate())
496     {
497         canBeRelocated_ = false;
498     }
499
500     // Create or find function entry block placeholder
501     image_basicBlock *ph_entryBlock;
502     codeRange *tmpRange;
503     bool preParsed = false;
504     if(image_->basicBlocksByRange.find(funcBegin, tmpRange))
505     {
506         ph_entryBlock = dynamic_cast<image_basicBlock *>(tmpRange);
507         if(ph_entryBlock->firstInsnOffset() == funcBegin)
508         {
509             // perfect match; this is either a 'stub' function
510             // or has already been parsed
511             if(!ph_entryBlock->isStub_)
512             {
513                 // non-stub existing blocks are handled by
514                 // parseSharedBlocks
515                 preParsed = true;
516             }
517         }
518         else
519         {
520             // existing block was *split* by function entry
521             ph_entryBlock = ph_entryBlock->split(funcBegin,this);
522
523             image_->basicBlocksByRange.insert(ph_entryBlock);
524
525             preParsed = true;
526         }
527     }
528     else
529     {
530         ph_entryBlock = new image_basicBlock(this, funcBegin);
531         ph_entryBlock->isStub_ = true;
532         image_->basicBlocksByRange.insert(ph_entryBlock);
533     }
534
535     // It is always safe to set these
536     ph_entryBlock->firstInsnOffset_ = funcBegin;
537     ph_entryBlock->isEntryBlock_ = true;
538
539     // Record entry block
540     entryBlock_ = ph_entryBlock;
541     entryBlocks_.push_back(ph_entryBlock);
542
543     // The "actual" entry block (alpha)
544     if(funcEntryAddr != funcBegin)
545     {
546         image_basicBlock *tmpBlock;
547         if(image_->basicBlocksByRange.find(funcEntryAddr,tmpRange))
548         {
549             tmpBlock = dynamic_cast<image_basicBlock *>(tmpRange);
550             if(tmpBlock->firstInsnOffset() == funcEntryAddr)
551             {
552                 tmpBlock->isEntryBlock_ = true;
553             }
554             else
555             {
556                 assert(tmpBlock == ph_entryBlock);
557                 tmpBlock = ph_entryBlock->split(funcEntryAddr,this);
558                 tmpBlock->isEntryBlock_ = true;
559             }
560         }
561         else
562         {
563             tmpBlock = new image_basicBlock(this, funcEntryAddr);
564             tmpBlock->isStub_ = true;
565             image_->basicBlocksByRange.insert(tmpBlock);
566             tmpBlock->isEntryBlock_ = true;
567         }
568         entryBlocks_.push_back(tmpBlock);
569
570             // special alpha-case "multiple" entry (really multiple lookup)
571             image_->funcsByEntryAddr[funcEntryAddr] = this;
572     } 
573     // End alpha-specific weirdness
574
575     // Note that the current function is in-progess to avoid
576     // spurious duplication for loops in the call graph
577     img()->activelyParsing[getOffset()] = this;
578
579     if(preParsed)
580     {
581         parseSharedBlocks(ph_entryBlock);
582     }
583     else
584     {
585         buildCFG(entryBlocks_, funcBegin);
586     }
587
588     finalize();
589
590     // done parsing
591     img()->activelyParsing.undef(getOffset());
592
593     return true;
594 }
595
596 /* buildCFG does the bulk of the work in parsing a function. It is
597  * responsible for following the control-flow through a function, building
598  * up the set of basic blocks that compose it, and connecting these blocks
599  * via edges.
600  *
601  * handles:
602  *  - noncontiguous blocks
603  *  - shared blocks (between other functions)
604  *  - previously parsed blocks
605  *
606  * Architecture Independance:
607  *  All architecture-specific code is abstracted away in the image-arch
608  *  modules.
609  */
610 bool image_func::buildCFG(
611         pdvector<image_basicBlock *>& funcEntries,
612         Address funcBegin)
613 {
614     // Parsing worklist
615     pdvector< Address > worklist;
616
617     // Basic block lookup
618     BPatch_Set< Address > leaders;
619     BPatch_Set< Address > allInstAddrs;
620     dictionary_hash< Address, image_basicBlock* > leadersToBlock( addrHash );
621
622     // Prevent overrunning an existing basic block in linear scan
623     Address nextExistingBlockAddr;
624     image_basicBlock *nextExistingBlock = NULL;
625
626     // Instructions and InstPoints
627     pdvector< instruction > allInstructions;
628     unsigned numInsns = 0;
629     image_instPoint *p;
630     int insnSize;
631
632     // misc
633     codeRange *tmpRange;
634
635     // ** Use to special-case abort and exit calls
636     dictionary_hash<Address, std::string> *pltFuncs = img()->getPltFuncs();
637     
638     // ** end abort/exit setup
639
640     Address funcEnd = funcBegin;
641     Address currAddr = funcBegin;
642
643     // The reverse ordering here is to make things easier on
644     // alpha. Indeed, the only reason funcEntries would ever
645     // have size > 1 is if we're on alpha.
646     for(int j=funcEntries.size()-1; j >= 0; j--)
647     {
648         Address a = funcEntries[j]->firstInsnOffset();
649         leaders += a;
650         leadersToBlock[a] = funcEntries[j];
651         worklist.push_back(a);
652     }
653
654     for(unsigned i=0; i < worklist.size(); i++)
655     {
656         InstrucIter ah(worklist[i],this);
657
658         image_basicBlock* currBlk = leadersToBlock[worklist[i]];
659
660         parsing_printf("[%s:%d] parsing block at 0x%lx, "
661                        "first insn offset 0x%lx\n",
662                        FILE__,__LINE__,
663                        worklist[i], 
664                        currBlk->firstInsnOffset());
665
666         // debuggin' 
667         assert(currBlk->firstInsnOffset() == worklist[i]);
668
669         // If this function has already parsed the block, skip
670         if(containsBlock(currBlk))
671             continue;
672
673         if(currBlk->isStub_)
674         {
675             currBlk->isStub_ = false;
676             addToBlocklist(currBlk);
677
678             parsing_printf("- adding block %d (0x%lx) to blocklist\n",
679                            currBlk->id(), currBlk->firstInsnOffset());
680         }
681         else
682         {
683             // non-stub block must have previously been parsed
684             parseSharedBlocks(currBlk, leaders, leadersToBlock, funcEnd);
685             continue;
686         }
687
688         // Remember where the next block is, so we don't blindly run over
689         // the top of it when scanning through instructions.
690         nextExistingBlockAddr = ULONG_MAX;
691         if(image_->basicBlocksByRange.successor(ah.peekNext(),tmpRange))
692         {
693             nextExistingBlock = dynamic_cast<image_basicBlock*>(tmpRange);
694             if(nextExistingBlock->firstInsnOffset_ > worklist[i])
695             {
696                 nextExistingBlockAddr = nextExistingBlock->firstInsnOffset_;
697             }
698         }
699
700         while(true) // instructions in block
701         {
702             currAddr = *ah;
703             insnSize = ah.getInstruction().size();
704
705             // The following three cases ensure that we properly handle
706             // situations where our parsing comes across previously
707             // pased code:
708             if(currAddr == nextExistingBlockAddr)
709             {
710                 assert(nextExistingBlock->firstInsnOffset_ == currAddr);
711
712                 parsing_printf("rolling over existing block at 0x%lx\n",currAddr);
713                 // end block with previous addr
714                 // add edge to to this newly found block
715                 // push it on the worklist vector            
716                 Address prevAddr = ah.peekPrev();
717                 currBlk->lastInsnOffset_ = prevAddr;
718                 currBlk->blockEndOffset_ = currAddr;
719
720                 addEdge(currBlk,nextExistingBlock,ET_FALLTHROUGH);
721
722                 if(!containsBlock(nextExistingBlock))
723                 {
724                     leaders += currAddr;
725                     leadersToBlock[currAddr] = nextExistingBlock;
726                     worklist.push_back(currAddr);
727                     parsing_printf("- adding address 0x%lx to worklist\n",
728                                     currAddr);
729
730                     // add this function to block ownership list if we're
731                     // rolling over an unparsed stub of another function
732                     // (i.e., the next block is the unparsed entry block
733                     // of some function)
734                     if(nextExistingBlock->isStub_ && 
735                        !nextExistingBlock->containedIn(this))
736                     {
737                         nextExistingBlock->addFunc(this);
738                     }
739                 }
740                 break;
741             }
742 #if defined(arch_x86) || defined(arch_x86_64)
743             // These portions correspond only to situations that arise
744             // because of overlapping but offset instruction streams --
745             // the kind that only happen on x86.
746
747             else if(allInstAddrs.contains( currAddr ))
748             {
749                 // This address has been seen but is not the start of
750                 // a basic block. This has the following interpretation:
751                 // The current instruction stream has become aligned with
752                 // an existing (previously parsed) instruction stream.
753                 // This should only be possible on x86, and in normal code
754                 // it should only be seen when dealing with cases like
755                 // instruction prefixes (where the bytes of two different
756                 // versions of an instruction effectively overlap).
757
758                 // XXX Because our codeRangeTree has no support for
759                 // overlapping ranges, we are only capable of supporting the
760                 // case where the current instruction stream starts at a
761                 // lower value in the address space than the stream it
762                 // overlaps with. This is sufficient to handle the
763                 // prefixed instruction case (because of the order we
764                 // place blocks on the work list), but any other case
765                 // will make the function uninstrumentable.
766
767                 if(currAddr > currBlk->firstInsnOffset_) {
768                     currBlk->lastInsnOffset_ = ah.peekPrev();
769                     currBlk->blockEndOffset_ = currAddr;
770
771                     // The newly created basic block will split
772                     // the existing basic block that encompasses this
773                     // address.
774                     addBasicBlock(currAddr,
775                                   currBlk,
776                                   leaders,
777                                   leadersToBlock,
778                                   ET_FALLTHROUGH,
779                                   worklist);
780                 } else {
781                     parsing_printf(" ... uninstrumentable due to instruction stream overlap\n");
782                     currBlk->lastInsnOffset_ = currAddr;
783                     currBlk->blockEndOffset_ = ah.peekNext();
784                     instLevel_ = UNINSTRUMENTABLE;
785                 }
786                 break;
787             }
788             else if(currAddr > nextExistingBlockAddr)
789             {
790                 // Overlapping instructions on x86 can mean that we
791                 // can step over the start of an existing block because
792                 // our instruction stream does not exactly coincide with its
793                 // instruction stream. In that case, find the next existing
794                 // block address and reprocess this instruction.
795
796                 nextExistingBlockAddr = ULONG_MAX;
797                 if(image_->basicBlocksByRange.successor(currAddr,tmpRange))
798                 {
799                     nextExistingBlock = 
800                             dynamic_cast<image_basicBlock*>(tmpRange);
801
802                     nextExistingBlockAddr = nextExistingBlock->firstInsnOffset_;
803                 }
804                 continue; // reprocess current instruction
805             }
806 #endif
807
808             allInstructions.push_back( ah.getInstruction() );
809             allInstAddrs += currAddr;
810
811             // only ever true on x86
812             if(ah.isFrameSetup() && savesFP_)
813             {
814                 noStackFrame = false;
815             }
816
817             // Special architecture-specific instruction processing
818             archInstructionProc( ah );
819
820             if( ah.isACondBranchInstruction() )
821             {
822                 currBlk->lastInsnOffset_ = currAddr;
823                 currBlk->blockEndOffset_ = ah.peekNext();
824
825                 if( currAddr >= funcEnd )
826                     funcEnd = ah.peekNext();
827                     //funcEnd = currAddr + insnSize;
828
829                 Address target = ah.getBranchTargetAddress();
830
831                 if( !img()->isValidAddress( target ) ) {
832                     p = new image_instPoint(currAddr,
833                                             ah.getInstruction(),
834                                             this,
835                                             target,
836                                             false,
837                                             false,
838                                             otherPoint);
839                     pdmod()->addUnresolvedControlFlow(p);
840
841                     retStatus_ = RS_UNKNOWN;
842                 }
843
844                 if(ah.isDelaySlot())
845                 {
846                     // Skip delay slot (sparc)
847                     ah++;
848                 } 
849
850                 // Process target first, to prevent overlap between
851                 // the fallthrough block and the target block.
852                 if( target < funcBegin )
853                 {
854                     parsing_printf("Parse of mod:func[%s:%s] Tying up conditional "
855                                    "jump target to address above the funcEntry"
856                                    "[0x%lx]: jump[0x%lx]=>[0x%lx]\n", 
857                                    img()->name().c_str(),
858                                    prettyName().c_str(),funcBegin, currAddr, target);
859                 }
860                 if ( img()->isValidAddress( target ) ) {
861                     addBasicBlock(target,
862                                   currBlk,
863                                   leaders,
864                                   leadersToBlock,
865                                   ET_COND_TAKEN,
866                                   worklist);
867                 }
868
869                 Address t2 = ah.peekNext(); 
870                
871                 // If this branch instruction split the current block,
872                 // the fallthrough should be added to the newly created
873                 // block instead of this current block. 
874                 image_basicBlock * real_src;
875                 if(target > currBlk->firstInsnOffset_ && target <= currAddr) {
876                     real_src = leadersToBlock[target];
877                     assert(real_src);
878                 } else {
879                     real_src = currBlk;
880                 }
881
882                 addBasicBlock(t2,
883                               real_src,
884                               leaders,
885                               leadersToBlock,
886                               ET_COND_NOT_TAKEN,
887                               worklist);
888
889                 break;                              
890             }
891             else if( ah.isAIndirectJumpInstruction() )
892             {
893                 currBlk->lastInsnOffset_ = currAddr;
894                 currBlk->blockEndOffset_ = ah.peekNext();
895                 parsing_printf("... indirect jump at 0x%lx\n", currAddr);
896
897                 if( currAddr >= funcEnd )
898                     funcEnd = ah.peekNext();
899     
900                 numInsns = allInstructions.size() - 2;
901                 if( numInsns == 0 ) {
902                     // this "function" is a single instruction long
903                     // (a jmp to the "real" function)
904                     parsing_printf("... uninstrumentable due to 0 size\n");
905                     instLevel_ = UNINSTRUMENTABLE; 
906                     //endOffset_ = startOffset_;
907                     endOffset_ = getOffset();
908                     p = new image_instPoint(currAddr,
909                                             ah.getInstruction(),
910                                             this,
911                                             0,
912                                             true,
913                                             true,
914                                             functionExit); 
915                     pdmod()->addUnresolvedControlFlow(p);
916
917                     // can't properly determine return status
918                     retStatus_ = RS_UNKNOWN;
919                     return false; 
920                 }                 
921
922                 if( archIsIndirectTailCall(ah) )
923                 {
924                     // looks like a tail call
925                     currBlk->isExitBlock_ = true;
926                     p = new image_instPoint(currAddr,
927                                             ah.getInstruction(),
928                                             this,
929                                             0,
930                                             true,
931                                             true,
932                                             functionExit);
933                     pdmod()->addUnresolvedControlFlow(p); 
934
935                     // can't properly determine return status
936                     retStatus_ = RS_UNKNOWN;
937                     break;
938                 }
939
940                 BPatch_Set< Address > targets;
941                 BPatch_Set< Address >::iterator iter;
942                                 if( archIsIPRelativeBranch(ah))
943                                 {
944                                         processJump(ah, currBlk, 
945                                                 funcBegin, funcEnd, allInstructions, leaders, worklist,
946                                                 leadersToBlock, pltFuncs);
947                                         break;
948                                 }
949
950                
951                 // get the list of targets
952                 if(!archGetMultipleJumpTargets(targets,currBlk,ah,
953                                                allInstructions))
954                 {
955                    instLevel_ = HAS_BR_INDIR;
956                    canBeRelocated_ = false;
957                    p = new image_instPoint(currAddr,
958                                            ah.getInstruction(),
959                                            this,
960                                            0,
961                                            true,
962                                            true,
963                                            otherPoint);
964                    pdmod()->addUnresolvedControlFlow(p);
965
966                    retStatus_ = RS_UNKNOWN;
967                 }
968                 
969                 iter = targets.begin();
970                 bool foundBadTarget = false;
971                 for(iter = targets.begin(); iter != targets.end(); iter++)
972                 {
973                     if( !img()->isValidAddress( *iter ) ) {
974                         if (!foundBadTarget) {
975                             p = new image_instPoint(currAddr,
976                                                     ah.getInstruction(),
977                                                     this,
978                                                     0,
979                                                     true, 
980                                                     false,
981                                                     otherPoint);
982                             pdmod()->addUnresolvedControlFlow(p);
983                             foundBadTarget = true;
984                         }
985
986                         retStatus_ = RS_UNKNOWN;
987                         continue;
988                     }
989
990                     if(*iter < funcBegin) {
991                         // FIXME see cond branch note, above
992                         currBlk->isExitBlock_ = true;
993                         // FIXME if this stays an exit block, it needs an exit
994                         // point
995                     }
996                     else { 
997                         addBasicBlock(*iter,
998                                       currBlk,
999                                       leaders,
1000                                       leadersToBlock,
1001                                       ET_INDIR,
1002                                       worklist);
1003                     }
1004                 }
1005
1006                 break;
1007             }
1008             else if( ah.isAJumpInstruction() )
1009             {
1010               processJump(ah, currBlk, 
1011                           funcBegin, funcEnd, allInstructions, leaders, worklist,
1012                           leadersToBlock, pltFuncs);
1013               break;
1014             }
1015             else if( ah.isAReturnInstruction() )
1016             {
1017                 // delay slots?
1018                 parsing_printf("... 0x%lx (%d) is a return\n", currAddr, 
1019                                 currAddr - getOffset());                       
1020
1021                 currBlk->lastInsnOffset_ = currAddr;
1022                 currBlk->blockEndOffset_ = ah.peekNext();
1023                 currBlk->isExitBlock_ = true;
1024                     
1025                 if( currAddr >= funcEnd )
1026                     funcEnd = ah.peekNext();
1027                 
1028                 parsing_printf("... making new exit point at 0x%lx\n", currAddr);                
1029                 p = new image_instPoint( currAddr, 
1030                                          ah.getInstruction(),
1031                                          this,
1032                                          functionExit);
1033                 funcReturns.push_back( p );
1034                 currBlk->containsRet_ = true;
1035                 retStatus_ = RS_RETURN;
1036
1037                 if (ah.getInstruction().isCleaningRet()) {
1038                     cleansOwnStack_ = true;
1039                 }
1040
1041                 break;
1042             }
1043             else if(ah.isACondReturnInstruction() )
1044             {
1045                 parsing_printf("cond ret branch at 0x%lx\n", currAddr);
1046                 currBlk->lastInsnOffset_ = currAddr;
1047                 currBlk->blockEndOffset_ = ah.peekNext();
1048                 currBlk->isExitBlock_ = true;
1049                   
1050                 if( currAddr >= funcEnd )
1051                     funcEnd = ah.peekNext();
1052                   
1053                 // And make an inst point
1054                 p = new image_instPoint(currAddr, 
1055                                         ah.getInstruction(),
1056                                         this,
1057                                         functionExit);
1058                 parsing_printf("Function exit at 0x%x\n", *ah);
1059                 funcReturns.push_back(p);
1060                 currBlk->containsRet_ = true;
1061                 retStatus_ = RS_RETURN;
1062                 
1063                 Address t2 = ah.peekNext();
1064                 addBasicBlock(t2,
1065                               currBlk,
1066                               leaders,
1067                               leadersToBlock,
1068                               ET_FALLTHROUGH,
1069                               worklist);
1070                 break;
1071             }
1072             else if( ah.isACallInstruction() ||
1073                      ah.isADynamicCallInstruction() )
1074             {
1075                 currBlk->lastInsnOffset_ = currAddr;
1076                 currBlk->blockEndOffset_ = ah.peekNext();
1077
1078                 if( currAddr >= funcEnd )
1079                     funcEnd = ah.peekNext();
1080
1081                 //validTarget is set to false if the call target is not a 
1082                 //valid address in the applications process space 
1083                 bool validTarget = true;
1084                 //simulateJump is set to true if the call should be treated
1085                 //as an unconditional branch for the purposes of parsing
1086                 //(a special case)
1087                 bool simulateJump = false;
1088
1089                 image_func *targetFunc = NULL;
1090                 
1091                 bool isAbsolute = false;
1092                 Address target = ah.getBranchTargetAddress(&isAbsolute);
1093
1094                 if(archIsRealCall(ah, validTarget, simulateJump))
1095                         {
1096                     if(ah.isADynamicCallInstruction()) {
1097                         p = new image_instPoint( currAddr,
1098                                                  ah.getInstruction(),
1099                                                  this,
1100                                                  0,
1101                                                  true,
1102                                                  true,
1103                                                  callSite);
1104                         pdmod()->addUnresolvedControlFlow(p);
1105
1106                         parsing_printf("[%s:%u] dynamic call 0x%lx -> ?\n",
1107                             FILE__,__LINE__,currAddr);
1108                     }
1109                     else {
1110                         p = new image_instPoint( currAddr,
1111                                                ah.getInstruction(),
1112                                                this,
1113                                                target,
1114                                                false,
1115                                                isAbsolute);
1116                       
1117                         parsing_printf("[%s:%u] binding call 0x%lx -> 0x%lx\n",
1118                             FILE__,__LINE__,currAddr,target);
1119
1120                         // bindCallTarget will look up the target function;
1121                         // if it does not exist, parsing will be initiated
1122                         targetFunc = bindCallTarget(target,currBlk);
1123                     }
1124                     calls.push_back( p );
1125                     currBlk->containsCall_ = true;
1126                 } // real call
1127                 else {
1128                     if(validTarget == false) {
1129                       parsing_printf("[%s:%u] call at 0x%lx targets invalid "
1130                                      "address 0x%lx\n",
1131                             FILE__,__LINE__,currAddr,target);
1132
1133                          if (!img()->isCode(target)) {
1134                              p = new image_instPoint(currAddr,
1135                                                      ah.getInstruction(),
1136                                                      this,
1137                                                      target,
1138                                                      false,
1139                                                      isAbsolute);
1140                              pdmod()->addUnresolvedControlFlow(p);
1141                          }
1142
1143                         currBlk->canBeRelocated_ = false;
1144                         canBeRelocated_ = false;
1145                     }
1146                     else if(simulateJump)
1147                     {
1148                         parsing_printf("[%s:%u] call at 0x%lx simulated as "
1149                                        "jump to 0x%lx\n",
1150                             FILE__,__LINE__,currAddr,target);
1151
1152                           addBasicBlock(target,
1153                                         currBlk,
1154                                         leaders,
1155                                         leadersToBlock,
1156                                         ET_DIRECT,
1157                                         worklist);
1158                     }
1159                 }
1160                 
1161                         if (ah.isDelaySlot()) {
1162                     // Delay slots get skipped; effectively pinned to 
1163                     // the prev. insn.
1164                     ah++;
1165                         }
1166                 
1167                 
1168                 if(targetFunc && (targetFunc->symTabName() == "exit" ||
1169                                   targetFunc->symTabName() == "abort" ||
1170                                   targetFunc->symTabName() == "__f90_stop" ||
1171                                   targetFunc->symTabName() == "fancy_abort"))
1172                 { 
1173                     parsing_printf("Call to %s (%lx) detected at 0x%lx\n",
1174                         targetFunc->symTabName().c_str(),
1175                         target, currAddr);
1176                 }
1177                 else if((*pltFuncs).defines(target) && 
1178                         ((*pltFuncs)[target] == "exit" || 
1179                         (*pltFuncs)[target] == "abort" ||
1180                         (*pltFuncs)[target] == "__f90_stop" ||
1181                         (*pltFuncs)[target] == "fancy_abort"))
1182                 {
1183                       parsing_printf("Call to %s (%lx) detected at 0x%lx\n",
1184                           (*pltFuncs)[target].c_str(),
1185                           target, currAddr);
1186                 }
1187                 else if(!simulateJump)
1188                 {
1189                     // we don't wire up a fallthrough edge if we're treating
1190                     // the call insruction as an unconditional branch
1191                     
1192                     // link up the fallthrough edge unless we know for
1193                     // certain that the target function does not return,
1194                     // or if the target is an entry in the PLT (and not
1195                     // one of the special-case non-returning entries above)
1196                     if(targetFunc && targetFunc->returnStatus() == RS_NORETURN
1197                        && !(*pltFuncs).defines(target))
1198                     {
1199                         parsing_printf("[%s:%u] not parsing past non-returning "
1200                                        "call at 0x%lx (to %s)\n",
1201                             FILE__,__LINE__,*ah,
1202                             targetFunc->symTabName().c_str());
1203                     }
1204                     else
1205                     {
1206                         Address next = ah.peekNext();
1207                         addBasicBlock(next,
1208                                   currBlk,
1209                                   leaders,
1210                                   leadersToBlock,
1211                                   ET_FUNLINK,
1212                                   worklist);
1213                     }
1214                 }
1215                 break;
1216             }
1217             else if( ah.isALeaveInstruction() )
1218             {
1219                 noStackFrame = false;
1220             }
1221             else if( archIsAbortOrInvalid(ah) )
1222             {
1223                 // some architectures have invalid instructions, and
1224                 // all have special 'abort-causing' instructions
1225
1226                 currBlk->lastInsnOffset_ = currAddr;
1227                 currBlk->blockEndOffset_ = ah.peekNext();
1228
1229                 if( currAddr >= funcEnd )
1230                     funcEnd = ah.peekNext();
1231
1232                 break;
1233             }
1234 #if defined(arch_ia64)
1235             else if( ah.isAnAllocInstruction() )
1236             {
1237                 // IA64 only, sad to say
1238                 if( currAddr == currBlk->firstInsnOffset() )
1239                 {
1240                     allocs.push_back( currAddr ); 
1241                 } 
1242                 else
1243                 {
1244                     currBlk->lastInsnOffset_ = ah.peekPrev();
1245                     currBlk->blockEndOffset_ = currAddr;
1246
1247                     // remove some cumulative information
1248                     allInstAddrs.remove(currAddr);
1249                     allInstructions.pop_back();
1250
1251                     addBasicBlock(currAddr,
1252                                     currBlk,
1253                                     leaders,
1254                                     leadersToBlock,
1255                                     ET_FALLTHROUGH,
1256                                     worklist);
1257                     break;
1258                 }
1259             }
1260 #endif
1261             if (!img()->isValidAddress(ah.peekNext())) {
1262                //The next instruction is not in the.text segment.  We'll 
1263                // abort this basic block as if it were terminating with 
1264                // an illegal instruction.
1265                parsing_printf("Next instruction is invalid, ending basic block\n");
1266                
1267                currBlk->lastInsnOffset_ = currAddr;
1268                currBlk->blockEndOffset_ = ah.peekNext();
1269
1270                if( currAddr >= funcEnd )
1271                   funcEnd = ah.peekNext();
1272                 parsing_printf("... making new exit point at 0x%lx\n", currAddr);                
1273                 p = new image_instPoint( currAddr,
1274                                          ah.getInstruction(),
1275                                          this,
1276                                          functionExit);
1277                 funcReturns.push_back( p );
1278                 break;
1279             }
1280             ah++;
1281         }
1282     }
1283
1284     endOffset_ = funcEnd;
1285
1286     // If the status hasn't been updated to UNKNOWN or RETURN by now,
1287     // set it to RS_NORETURN
1288     if(retStatus_ == RS_UNSET)
1289         retStatus_ = RS_NORETURN;
1290
1291     return true;
1292 }
1293
1294 /* bindCallTarget links the target address of a call to the function
1295  * and entry block to which it refers. If such a function/block pair
1296  * does not exist (or if the target is the middle of another function),
1297  * a new function will be created and parsed.
1298  *
1299  * Returns: pointer to the [parsed] function object that is target of call
1300  *
1301  * Side effects:
1302  *  - May split existing blocks
1303  *  - May mark existing blocks as shared
1304  *  - May recursively parse one or more functions
1305  *
1306  * This function can be called with NULL currBlk as a shorthand for
1307  * finding or recursively parsing the target
1308  */
1309 image_func * image_func::bindCallTarget(
1310         Address target,
1311         image_basicBlock* currBlk)
1312     {
1313     codeRange *tmpRange;
1314     image_basicBlock *ph_callTarget = NULL;
1315     image_basicBlock *newBlk;
1316     image_func *targetFunc;
1317     bool created = false;
1318
1319     // targetfunc may be parsed or unparsed, and it may not yet have
1320     // an entry basic block associated with it. `created' indicates
1321     // whether a new image_func was created
1322     targetFunc = FindOrCreateFunc(target,FS_RT,created);
1323
1324     if(image_->basicBlocksByRange.find(target,tmpRange))
1325     {
1326         ph_callTarget =       
1327             dynamic_cast<image_basicBlock*>(tmpRange);
1328     }
1329         
1330     if(ph_callTarget && ph_callTarget->firstInsnOffset_ == target )
1331     {   
1332         if(currBlk)
1333             addEdge(currBlk,ph_callTarget,ET_CALL);
1334     }           
1335     else {
1336         // going to need a new basic block
1337         newBlk = new image_basicBlock(targetFunc,target);
1338
1339         if(ph_callTarget) {
1340             // The target lies within an existing block, which
1341             // must therefore be split
1342             ph_callTarget->split(newBlk);
1343         }
1344         else
1345             newBlk->isStub_ = true;
1346     
1347         image_->basicBlocksByRange.insert(newBlk);
1348
1349         if(currBlk)
1350             addEdge(currBlk,newBlk,ET_CALL);
1351     }
1352
1353     // Now parse the function, if necessary                
1354     if(!targetFunc->parsed()) {
1355         assert( targetFunc->img()->isCode(targetFunc->getOffset()) );
1356
1357         parsing_printf("[%s:%u] recursive parsing of call target at 0x%lx\n",
1358                 FILE__,__LINE__,targetFunc->getOffset());
1359
1360         if(targetFunc->parse()) {
1361             targetFunc->img()->recordFunction(targetFunc);
1362
1363             parsing_printf("[%s:%u] recursive parsing of 0x%lx complete\n",
1364                 FILE__,__LINE__,targetFunc->getOffset());
1365         } else {
1366             parsing_printf("[%s:%u] recursive parsing of 0x%lx failed\n",
1367                 FILE__,__LINE__,targetFunc->getOffset());
1368
1369             /* XXX Because of the enormous amount of references to functions
1370                    created during parsing, it is not currently safe to
1371                    delete image_func objects once they have been partially
1372                    parsed. This needs a design-level fix.
1373             // only want to delete this function if FindOrCreateFunc
1374             // actually created one
1375             if(created) 
1376                 delete targetFunc;
1377             */
1378             targetFunc = NULL;
1379         }
1380     }
1381                 
1382     return targetFunc;
1383 }
1384
1385 /* FindOrCreateFunc
1386  *
1387  * Look up or create a new function for this image at a given address,
1388  * depending on whether one exists in the symbol table (or was previously
1389  * discovered).
1390  *
1391  * Returns: pointer to an existing or new function
1392  */
1393 image_func * image_func::FindOrCreateFunc(Address target, 
1394                                           FuncSource src,
1395                                           bool & created)
1396 {
1397     image_func *targetFunc;
1398
1399     if(image_->funcsByEntryAddr.defines(target))
1400     {   
1401         targetFunc = image_->funcsByEntryAddr[target];
1402     }
1403     else
1404     {   
1405         if(img()->activelyParsing.defines(target))
1406             targetFunc = img()->activelyParsing[target];
1407         else {
1408             char name[32];
1409 #if defined (os_windows)
1410             _snprintf(name,32,"targ%lx",target);
1411 #else
1412             snprintf(name,32,"targ%lx",target);
1413 #endif
1414             targetFunc = new image_func(name,target,UINT_MAX,mod_,image_,src);
1415             created = true;
1416         }
1417     }
1418     
1419     return targetFunc;
1420 }
1421
1422 /* parseSharedBlocks handles the case where a reachable path of blocks
1423  * in a function have already been parsed by another function. All blocks
1424  * reachable from firstBlock will be added to the current function, and
1425  * will be updated to reflect their shared status.
1426  *
1427  * Call edges are, as in normal parsing, not followed.
1428  *
1429  * Side effects: (many)
1430  *  - Adds to leaders
1431  *  - Adds to leadersToBlock
1432  *  - May update other functions' "shared" status
1433  *
1434  */
1435 void image_func::parseSharedBlocks(image_basicBlock * firstBlock,
1436                 BPatch_Set< Address > &leaders,
1437                 dictionary_hash< Address, image_basicBlock * > &leadersToBlock,
1438                 Address & funcEnd)
1439 {
1440     pdvector< image_basicBlock * > WL;
1441     pdvector< image_edge * > targets;
1442
1443     BPatch_Set< image_basicBlock * > visited;
1444
1445     image_basicBlock * curBlk;
1446     image_basicBlock * child;
1447
1448     image_instPoint * tmpInstPt;
1449     image_instPoint * cpyInstPt;
1450
1451     WL.push_back(firstBlock);
1452     visited.insert(firstBlock);
1453
1454     parsing_printf("[%s:%u] Parsing shared code at 0x%lx, startoffset: "
1455                    "0x%lx endoffset: 0x%lx\n",
1456         FILE__,__LINE__,firstBlock->firstInsnOffset_, getOffset(), endOffset_);
1457
1458     // remember that we have shared blocks
1459     containsSharedBlocks_ = true;
1460
1461     // assume that return status is UNKNOWN because we do not do a detailed
1462     // parse of the code. This assumption leads to the greatest code 
1463     // coverage
1464     if(retStatus_ == RS_UNSET)
1465         retStatus_ = RS_UNKNOWN;
1466
1467     // There are several cases that can lead to a pre-parsed block
1468     // having the current function (this) on its funcs_ list:
1469     //
1470     // 1. The shared block is the result of splitting a block in another
1471     // function (current function added to funcs_ in block creation)
1472     //
1473     // 2. The shared block was created by a call operation to an unparsed
1474     //    function *and then subsequently parsed prior to parsing the
1475     //    function for which it is the entry block* (whew). In that case
1476     //    the entire "called" function is pre-parsed, but it is already
1477     //    on the funcs_ list of the *first* block (bound up in 
1478     //    bindCallTarget. 
1479     //
1480     // In both cases, 'this' will always be the first function on the funcs_
1481     // list, if it is there at all, so we always check whether funcs_[0] == 
1482     // this prior to adding this to the funcs_ list. 
1483     //
1484
1485     while(WL.size() > 0)
1486     {
1487         curBlk = WL.back();
1488         WL.pop_back(); 
1489   
1490         if(containsBlock(curBlk))
1491             continue;
1492
1493         // If the block isn't shared, it only has one function that doesn't
1494         // yet know that it contains shared code.
1495         if(!curBlk->isShared_)
1496         {
1497             image_func * first = *curBlk->funcs_.begin();
1498             first->containsSharedBlocks_ = true;
1499             first->needsRelocation_ = true;
1500         }
1501
1502         // If this block is a stub, we'll revisit it when parsing is done
1503         if(curBlk->isStub_) {
1504             img()->reparse_shared.push_back(
1505                 pair<image_basicBlock *, image_func *>(curBlk,this));
1506             parsing_printf("XXXX posponing stub block %d (0x%lx) for later\n",
1507                 curBlk->id(),curBlk->firstInsnOffset_);
1508             continue;
1509         }
1510
1511         /* Copy instrumentation points (if any) to this function that
1512            will incorporate the shared code.
1513
1514            XXX
1515            Given parsing errors that are possible given misparsed
1516            control flow (e.g., badly parsed indirect control flow,
1517            non-returning functions, and opaque conditional branches),
1518            it is possible for misaligned blocks to overlap &
1519            include the same instpoint. This is /not safe/ and should
1520            be addressed in a new, overlap-friendly parser, but
1521            in the meanwhile it is imperative that instpoints are not
1522            duplicated in these [rare] cases.
1523
1524            XXX
1525            For the moment we are not implementing a check before
1526            copy to continue to exercise parsing bugs.
1527         */
1528
1529         // note that these have to occur before this function
1530         // is added to the block's ownership list
1531         if((tmpInstPt = curBlk->getCallInstPoint()) != NULL)
1532         {
1533             parsing_printf("... copying call point at 0x%lx\n", tmpInstPt->offset());                
1534             cpyInstPt = new image_instPoint(tmpInstPt->offset(),
1535                                     tmpInstPt->insn(),
1536                                     this,
1537                                     tmpInstPt->callTarget(),
1538                                     tmpInstPt->isDynamic());
1539             calls.push_back(cpyInstPt);    
1540         }
1541         if((tmpInstPt = curBlk->getRetInstPoint()) != NULL)
1542         {
1543             parsing_printf("... copying exit point at 0x%lx\n", tmpInstPt->offset());                
1544             cpyInstPt = new image_instPoint(tmpInstPt->offset(),
1545                                     tmpInstPt->insn(),
1546                                     this,
1547                                     tmpInstPt->getPointType());
1548             funcReturns.push_back(cpyInstPt);
1549
1550             retStatus_ = RS_RETURN;
1551         }
1552
1553         // As described in the comment above, the
1554         // only cases in which 'this' could be on the block's funcs_ list
1555         // are when it is the first entry. 
1556         if(*curBlk->funcs_.begin() != this) {
1557         curBlk->addFunc(this);
1558         }
1559
1560         // update block
1561         addToBlocklist(curBlk);
1562         parsing_printf("XXXX adding pre-parsed block %d (0x%lx) to blocklist\n",
1563                 curBlk->id(),curBlk->firstInsnOffset_);
1564         // update "function end"
1565         if(funcEnd < curBlk->blockEndOffset_)
1566             funcEnd = curBlk->blockEndOffset_;
1567
1568         targets.clear();
1569         curBlk->getTargets(targets);
1570
1571         for(unsigned int i=0;i<targets.size();i++) 
1572         {
1573             child = targets[i]->getTarget();
1574
1575             if(targets[i]->getType() != ET_CALL && !visited.contains(child))
1576             {
1577                 leaders += child->firstInsnOffset_;
1578                 leadersToBlock[ child->firstInsnOffset_ ] = child;
1579                 WL.push_back(child);
1580                 visited.insert(child);
1581             }
1582         }
1583     }
1584 #if defined (cap_use_pdvector)
1585     WL.zap();
1586     targets.zap();
1587 #else
1588     WL.clear();
1589     targets.clear();
1590 #endif
1591 }
1592
1593 /* A specialized version for parsing shared blocks when we don't care
1594  * to save any information about the blocks that were visited (that is,
1595  * when the entire function is known to be shared and so no other normal
1596  * parsing is taking place).
1597  */
1598 void image_func::parseSharedBlocks(image_basicBlock * firstBlock)
1599 {
1600     BPatch_Set< Address > leaders;
1601     dictionary_hash< Address, image_basicBlock * > leadersToBlock( addrHash );
1602
1603     endOffset_ = getOffset();
1604     
1605     parseSharedBlocks(firstBlock,
1606                       leaders,          /* unused */
1607                       leadersToBlock,   /* unused */
1608                       endOffset_);
1609 }
1610
1611 void image_func::processJump(InstrucIter& ah, 
1612                              image_basicBlock* currBlk,
1613                              Address& funcBegin,
1614                              Address& funcEnd,
1615                              pdvector<instruction>& allInstructions,
1616                              BPatch_Set<Address>& leaders,
1617                              pdvector<Address>& worklist,
1618                              dictionary_hash<Address, image_basicBlock*>& leadersToBlock,
1619                              dictionary_hash<Address, std::string> *pltFuncs
1620                              )
1621 {
1622   Address currAddr = *ah;
1623   image_instPoint* p;
1624   // delay slots?
1625   currBlk->lastInsnOffset_ = currAddr;
1626   currBlk->blockEndOffset_ = ah.peekNext();
1627   
1628   if( currAddr >= funcEnd )
1629     funcEnd = ah.peekNext();
1630   
1631   Address target = ah.getBranchTargetAddress();
1632   
1633   parsing_printf("... 0x%lx is a jump to 0x%lx\n",
1634                  currAddr, target);
1635   
1636   Address catchStart;
1637   
1638   // Only used on x86
1639   if(archProcExceptionBlock(catchStart,ah.peekNext()))
1640   {
1641     addBasicBlock(catchStart,
1642                   currBlk,
1643                   leaders,
1644                   leadersToBlock,
1645                   ET_CATCH,
1646                   worklist);
1647   }
1648   
1649   if( !img()->isValidAddress( target ) ) {
1650     p = new image_instPoint(currAddr,
1651                             ah.getInstruction(),
1652                             this,
1653                             target,
1654                             false,
1655                             false,
1656                             otherPoint); 
1657     pdmod()->addUnresolvedControlFlow(p);
1658
1659     retStatus_ = RS_UNKNOWN;
1660     return;
1661   }
1662
1663   if(archIsATailCall( ah, allInstructions ) ||
1664      (*pltFuncs).defines(target))
1665   {
1666     // XXX this output is for testing only; remove
1667     if((*pltFuncs).defines(target)) {
1668       parsing_printf("[%s:%u] tail call into plt %lx -> %lx\n",
1669                      FILE__,__LINE__,*ah,target);
1670     }
1671           
1672     // Only on x86 & sparc currently
1673     currBlk->isExitBlock_ = true;
1674
1675     p = new image_instPoint(currAddr,
1676                             ah.getInstruction(),
1677                             this,
1678                             target,
1679                             false,
1680                             false,
1681                             functionExit);
1682     funcReturns.push_back( p );
1683     currBlk->containsRet_ = true;
1684
1685     parsing_printf("[%s:%u] tail call 0x%lx -> 0x%lx\n",
1686         FILE__,__LINE__,*ah,target);
1687     image_func *targetFunc = bindCallTarget(target,currBlk);
1688     if(targetFunc) {
1689         // Functions that make tail calls inherit the return
1690         // status of their call targets
1691         if(retStatus_ == RS_UNSET &&
1692             targetFunc->returnStatus() != RS_NORETURN)
1693         {
1694             retStatus_ = targetFunc->returnStatus();
1695         }
1696     } else {
1697         parsing_printf("[%s:%u] unparseable tail call at 0x%lx",
1698             FILE__,__LINE__,*ah);
1699         retStatus_ = RS_UNKNOWN;
1700     }
1701   }
1702   else 
1703   {
1704     if( target < funcBegin ) {
1705       parsing_printf("Parse of mod:func[%s:%s] Tying up jump target "
1706                      "to address above the funcEntry[0x%lx]: "
1707                      "jump[0x%lx]=>[0x%lx]\n", img()->name().c_str(),
1708                      prettyName().c_str(),funcBegin, currAddr, target);
1709     } else {
1710       parsing_printf("... tying up unconditional branch target\n");
1711     }
1712     addBasicBlock(target,
1713                   currBlk,
1714                   leaders,
1715                   leadersToBlock,
1716                   ET_DIRECT,
1717                   worklist);
1718   }
1719 }