2 * Copyright (c) 1996-2004 Barton P. Miller
4 * We provide the Paradyn Parallel Performance Tools (below
5 * described as "Paradyn") on an AS IS basis, and do not warrant its
6 * validity or performance. We reserve the right to update, modify,
7 * or discontinue this software at any time. We shall have no
8 * obligation to supply such updates or modifications or any other
9 * form of support to you.
11 * This license is for research uses. For such uses, there is no
12 * charge. We define "research use" to mean you may freely use it
13 * inside your organization for whatever purposes you see fit. But you
14 * may not re-distribute Paradyn or parts of Paradyn, in any form
15 * source or binary (including derivatives), electronic or otherwise,
16 * to any other organization or entity without our permission.
18 * (for other uses, please contact us at paradyn@cs.wisc.edu)
20 * All warranties, including without limitation, any warranty of
21 * merchantability or fitness for a particular purpose, are hereby
24 * By your use of Paradyn, you understand and agree that we (or any
25 * other person or entity with proprietary rights in Paradyn) are
26 * under no obligation to provide either maintenance services,
27 * update services, notices of latent defects, or correction of
28 * defects for Paradyn.
30 * Even if advised of the possibility of such damages, under no
31 * circumstances shall we (or any other person or entity with
32 * proprietary rights in the software licensed hereunder) be liable
33 * to you or any third party for direct, indirect, or consequential
34 * damages of any character regardless of type of action, including,
35 * without limitation, loss of profits, loss of use, loss of good
36 * will, or computer failure or malfunction. You agree to indemnify
37 * us (and any other person or entity with proprietary rights in the
38 * software licensed hereunder) for any and all liability it may
39 * incur to third parties resulting from your use of Paradyn.
43 * $Id: image-flowGraph.C,v 1.54 2008/10/28 18:42:46 bernat Exp $
51 #include "dyninstAPI/src/symtab.h"
52 #include "dyninstAPI/src/arch.h"
53 #include "dyninstAPI/src/instPoint.h"
54 #include "symtabAPI/h/Symtab.h"
56 #include "dyninstAPI/src/parRegion.h"
59 #include "dyninstAPI/src/util.h"
60 #include "dyninstAPI/src/debug.h"
62 #include "dyninstAPI/src/InstrucIter.h"
64 #include "symtabAPI/h/LineInformation.h"
65 #include "dyninstAPI/h/BPatch_flowGraph.h"
67 #if defined(TIMED_PARSE)
71 using namespace Dyninst;
73 #if defined (cap_use_pdvector)
74 int addrfunccmp( image_func*& pdf1, image_func*& pdf2 )
76 if ( pdf1->getOffset() > pdf2->getOffset() )
78 if ( pdf1->getOffset() < pdf2->getOffset() )
83 //stl needs strict weak ordering (true if arg 1 is strictly less than 2)
84 bool addrfunccmp( image_func* pdf1, image_func* pdf2 )
86 if ( pdf1->getOffset() < pdf2->getOffset() )
90 if ( pdf1->getOffset() < pdf2->getOffset() )
98 //analyzeImage() constructs a flow graph of basic blocks in the image and
99 //assigns them to functions. It follows calls to discover new functions,
100 //as well as using heuristics to search for functions within gaps between
103 //Note: basic blocks may be shared between several functions
104 bool image::analyzeImage()
106 #if defined(TIMED_PARSE)
107 struct timeval starttime;
108 gettimeofday(&starttime, NULL);
111 stats_parse.startTimer(PARSE_ANALYZE_TIMER);
113 #if defined(arch_x86_64)
114 ia32_set_mode_64(getObject()->getAddressWidth() == 8);
118 pdmodule *mod = NULL;
120 assert(parseState_ < analyzed);
121 // Prevent recursion: with this set we can call findFoo as often as we want
122 parseState_ = analyzing;
124 if (parseState_ < symtab) {
125 fprintf(stderr, "Error: attempt to analyze before function lists built\n");
126 stats_parse.stopTimer(PARSE_ANALYZE_TIMER);
130 // Parse the functions reported in the symbol table
131 for(unsigned i=0; i<symtabCandidateFuncs.size(); ++i) {
132 pdf = symtabCandidateFuncs[i];
135 assert(pdf); assert(mod);
137 // Check whether recursive traversal has dealt with a function
138 // before proceeding to parse it
140 parsing_printf("[%s:%u] calling parse() at 0x%lx (ptr %p)\n",
141 FILE__,__LINE__,pdf->getOffset(),pdf);
144 enterFunctionInTables(pdf);
146 parsing_printf("[%s:%u] symtab-defined function %s at 0x%lx "
147 "failed to parse\n",FILE__,__LINE__,
148 pdf->symTabName().c_str(),pdf->getOffset());
150 /** FIXME removing symtab-defined functions is currently unsafe,
151 as there is no mechanism to remove the symbols (and thus
152 their up-pointers) through the SymtabAPI. See bug 906.
153 For the time being, leaving these crufty functions around.
155 // symtab functions need to be removed from the list
156 // of known functions by address
157 //funcsByEntryAddr.undef(pdf->getOffset());
163 /* Some functions that built their block lists through the
164 `shared code' mechanism may be incomplete. Finish them. */
165 parsing_printf("[%s:%u] Completing `shared code' parsing: %u entries\n",
166 FILE__,__LINE__,reparse_shared.size());
167 for(unsigned i=0;i<reparse_shared.size();++i) {
168 pair<image_basicBlock *,image_func*> & p = reparse_shared[i];
169 (p.second)->parseSharedBlocks(p.first);
175 For starters, we have all functions with some
176 sort of indentifier in the function name ('@' for Power),
177 designanting an outlined OpenMP Function. This is contained in the
178 'parallelRegions' variable. We then want to find out the parent function
179 for each of these regions, i.e. the function that sets all the parameters
180 for a given OpenMP function. We need this parent function to gather
181 information about what kind of directive is associated with this outline
182 function and the clauses associated with that function
185 #if defined(os_solaris) || defined(os_aix)
186 image_parRegion *parReg;
187 int currentSectionNum = 0;
189 // Most of the time there will be no parallel regions in an image, so nothing will happen.
190 // If there is a parallel region we examine it individually
191 for (unsigned i = 0; i < parallelRegions.size(); i++)
193 parReg = parallelRegions[i];
198 // Every parallel region has the image_func that contains the
199 // region associated with it
200 image_func * imf = parReg->getAssociatedFunc();
202 // Returns pointers to all potential image_funcs that correspond
203 // to what could be the parent OpenMP function for the region
204 const pdvector<image_func *> *prettyNames =
205 findFuncVectorByPretty(imf->calcParentFunc(imf, parallelRegions));
207 //There may be more than one (or none) functions with that name, we take the first
208 // This function gives us all the information about the parallel region by getting
209 // information for the parallel region parent function
210 if (prettyNames->size() > 0)
211 imf->parseOMP(parReg, (*prettyNames)[0], currentSectionNum);
217 /**************************/
218 /* END OpenMP Parsing Code */
219 /**************************/
223 #if 0 // stripped binary parsing in its current form is dangerous
224 #if defined(cap_stripped_binaries)
226 mod = getOrCreateModule("DEFAULT_MODULE", linkedFile->imageOffset());
227 // Sort functions to find gaps
228 VECTOR_SORT(everyUniqueFunction, addrfunccmp);
230 Address gapStart = linkedFile->imageOffset();
232 int funcIdx = 0; // index into everyUniqueFunction (which may be empty)
234 /* The everyUniqueFunction vector can be updated in the for
235 loop. If functions are discovered preceeding the one
236 pointed to by the current 'funcIdx' index, we need to
237 advance funcIdx and gapStart to the correct position */
238 /* (It would be better to use a data structure with an iterator
239 that doesn't break on inserts, such as an ordered set) */
240 while(funcIdx < (int) everyUniqueFunction.size() &&
241 everyUniqueFunction[funcIdx]->getEndOffset() < gapStart)
243 /*parsing_printf("[%s:%u] advancing gap index (gapstart: 0x%lx, "
244 "offset: 0x%lx)\n", FILE__,__LINE__,
245 gapStart,everyUniqueFunction[funcIdx]->getOffset());*/
248 if (funcIdx > 0 && funcIdx < (int) everyUniqueFunction.size()
249 && gapStart < everyUniqueFunction[funcIdx]->getEndOffset()) {
250 gapStart = everyUniqueFunction[funcIdx]->getEndOffset();
253 // set gapEnd to start of next function or end of code section,
254 // on 1st iter, set gapEnd to start of 1st function or end of code section
255 if (funcIdx == 0 && everyUniqueFunction.size() > 0) {
256 gapEnd = everyUniqueFunction[0]->getOffset();
258 else if(funcIdx+1 < (int)everyUniqueFunction.size()) {
259 gapEnd = everyUniqueFunction[funcIdx + 1]->getOffset();
261 // if there is no next function, set gapEnd to end of codeSection
262 else if (funcIdx+1 == (int) everyUniqueFunction.size() || funcIdx == 0) {
263 gapEnd = linkedFile->imageOffset() + linkedFile->imageLength();
266 break; // advanced gap past the end of the code section
269 int gap = gapEnd - gapStart;
271 parsing_printf("[%s:%u] scanning for prologues in range 0x%lx-0x%lx\n",
272 FILE__,__LINE__,gapStart, gapEnd);
274 //gap should be big enough to accomodate a function prologue
276 Address pos = gapStart;
277 while( pos < gapEnd && isCode( pos ) )
279 const unsigned char* instPtr;
280 instPtr = (const unsigned char *)getPtrToInstruction( pos );
282 insn.setInstruction( instPtr, pos );
284 if( isFunctionPrologue(insn) && !funcsByEntryAddr.defines(pos))
287 snprintf( name, 32, "gap_%lx", pos );
288 pdf = new image_func( name, pos, UINT_MAX, mod, this);
291 pdf->addSymTabName(name); // newly added
292 pdf->addPrettyName( name );
295 // TODO FIXME: should update _all_ symbol sizes....
296 pdf->getSymtabFunction()->getFirstSymbol()->setSize(pdf->get_size());
298 enterFunctionInTables(pdf, false);
300 // FIXME this code has not been updated to the reality
301 // of RT parsing at this point, and will not work
304 // If any calls were discovered, adjust our
305 // position in the function vector accordingly
306 if(callTargets.size() > 0) {
307 while( callTargets.size() > 0 )
309 /* parse static call targets */
310 parseStaticCallTargets( callTargets,
311 new_targets, preParseStubs );
314 VECTOR_APPEND(callTargets,new_targets);
318 VECTOR_SORT(everyUniqueFunction, addrfunccmp);
319 break; // Return to for loop
321 VECTOR_SORT(everyUniqueFunction, addrfunccmp);
324 pos = ( gapEnd < pdf->getEndOffset() ?
325 gapEnd : pdf->getEndOffset() );
333 // set gapStart for next iteration & increment funcIdx
334 if (funcIdx < (int)everyUniqueFunction.size()) {
335 gapStart = everyUniqueFunction[funcIdx]->getEndOffset();
338 } while (funcIdx < (int)everyUniqueFunction.size());
343 // Bind all intra-module call points
344 for (unsigned b_iter = 0; b_iter < everyUniqueFunction.size(); b_iter++) {
345 everyUniqueFunction[b_iter]->checkCallPoints();
348 #if defined(TIMED_PARSE)
349 struct timeval endtime;
350 gettimeofday(&endtime, NULL);
351 unsigned long lstarttime = starttime.tv_sec * 1000 * 1000 + starttime.tv_usec;
352 unsigned long lendtime = endtime.tv_sec * 1000 * 1000 + endtime.tv_usec;
353 unsigned long difftime = lendtime - lstarttime;
354 double dursecs = difftime/(1000 );
355 cout << __FILE__ << ":" << __LINE__ <<": analyzeImage of " << name_ << " took "<<dursecs <<" msecs" << endl;
359 parseState_ = analyzed;
360 stats_parse.stopTimer(PARSE_ANALYZE_TIMER);
364 void image::recordFunction(image_func *f)
366 // update symtab's notion of "size"
367 // TODO FIXME: should update _all_ symbol sizes....
368 f->getSymtabFunction()->getFirstSymbol()->setSize(f->get_size());
370 enterFunctionInTables(f);
373 // XXX this first call to addSymTabName certainly looks redundant...
374 f->addSymTabName(f->symTabName().c_str());
375 f->addPrettyName(f->symTabName().c_str() );
378 /* parse is responsible for initiating parsing of a function. instPoints
379 * and image_basicBlocks are created at this level.
381 * Returns: success or failure
383 bool image_func::parse()
385 pdvector< image_basicBlock * > entryBlocks_;
389 parsing_printf("[%s:%u] multiple call of parse() for %s\n",
390 symTabName().c_str());
395 parsing_printf("[%s:%u] parsing %s at 0x%lx\n", FILE__,__LINE__,
396 symTabName().c_str(), getOffset());
398 if(!img()->isValidAddress(getOffset())) {
399 parsing_printf("[%s:%u] refusing to parse from invalid address 0x%lx\n",
400 FILE__,__LINE__,getOffset());
404 // Some architectures have certain functions or classes of functions
405 // that should appear to be parsed without actually being parsed.
406 if(archAvoidParsing())
408 retStatus_ = RS_UNKNOWN;
412 // For some architectures, some functions are recognizably unparseable
414 if(archIsUnparseable())
416 retStatus_ = RS_UNKNOWN;
417 parsing_printf("[%s:%u] archIsUnparseable denied parse at 0x%lx\n",
418 FILE__,__LINE__,getOffset());
422 // Optimistic assumptions
424 canBeRelocated_ = true;
429 Address funcBegin = getOffset();
430 Address funcEntryAddr = funcBegin;
432 InstrucIter ah(funcBegin, this);
434 // Test for various reasons we might just give up and die
435 if( !archCheckEntry( ah, this ) )
437 // Check for redirection to another function as first insn.
438 // XXX This is a heuristic that we should revisit; is it really
439 // a common idiom for redirection to another function? Why
440 // do we need to treat it that way? -- nate & kevin
441 if( ah.isAJumpInstruction() )
443 Address target = ah.getBranchTargetAddress();
445 parsing_printf("[%s:%u] direct jump to 0x%lx at entry point\n",
446 FILE__,__LINE__,target);
448 if(img()->isCode(target)) {
449 // Recursively parse this function
450 bindCallTarget(target,NULL);
452 // Create instrumentation point for on-demand parsing
453 p = new image_instPoint(*ah,
460 pdmod()->addUnresolvedControlFlow(p);
463 endOffset_ = ah.peekNext();
464 instLevel_ = UNINSTRUMENTABLE;
466 retStatus_ = RS_UNKNOWN;
467 parsing_printf("[%s:%u] archCheckEntry denied parse at 0x%lx\n",
468 FILE__,__LINE__,getOffset());
472 // Some architectures (alpha) have an entry point that is
473 // not the function start; knowing where it is enables
474 // correct instrumentation.
475 archGetFuncEntryAddr(funcEntryAddr);
476 ah.setCurrentAddress(funcEntryAddr);
478 // Define entry point
479 p = new image_instPoint(funcEntryAddr,
483 funcEntries_.push_back(p);
487 if(tmp.isStackFramePreamble(frameSize))
489 archSetFrameSize(frameSize);
490 noStackFrame = false;
492 savesFP_ = ah.isFramePush(); // only ever true on x86
494 // Architecture-specific "do not relocate" list
497 canBeRelocated_ = false;
500 // Create or find function entry block placeholder
501 image_basicBlock *ph_entryBlock;
503 bool preParsed = false;
504 if(image_->basicBlocksByRange.find(funcBegin, tmpRange))
506 ph_entryBlock = dynamic_cast<image_basicBlock *>(tmpRange);
507 if(ph_entryBlock->firstInsnOffset() == funcBegin)
509 // perfect match; this is either a 'stub' function
510 // or has already been parsed
511 if(!ph_entryBlock->isStub_)
513 // non-stub existing blocks are handled by
520 // existing block was *split* by function entry
521 ph_entryBlock = ph_entryBlock->split(funcBegin,this);
523 image_->basicBlocksByRange.insert(ph_entryBlock);
530 ph_entryBlock = new image_basicBlock(this, funcBegin);
531 ph_entryBlock->isStub_ = true;
532 image_->basicBlocksByRange.insert(ph_entryBlock);
535 // It is always safe to set these
536 ph_entryBlock->firstInsnOffset_ = funcBegin;
537 ph_entryBlock->isEntryBlock_ = true;
539 // Record entry block
540 entryBlock_ = ph_entryBlock;
541 entryBlocks_.push_back(ph_entryBlock);
543 // The "actual" entry block (alpha)
544 if(funcEntryAddr != funcBegin)
546 image_basicBlock *tmpBlock;
547 if(image_->basicBlocksByRange.find(funcEntryAddr,tmpRange))
549 tmpBlock = dynamic_cast<image_basicBlock *>(tmpRange);
550 if(tmpBlock->firstInsnOffset() == funcEntryAddr)
552 tmpBlock->isEntryBlock_ = true;
556 assert(tmpBlock == ph_entryBlock);
557 tmpBlock = ph_entryBlock->split(funcEntryAddr,this);
558 tmpBlock->isEntryBlock_ = true;
563 tmpBlock = new image_basicBlock(this, funcEntryAddr);
564 tmpBlock->isStub_ = true;
565 image_->basicBlocksByRange.insert(tmpBlock);
566 tmpBlock->isEntryBlock_ = true;
568 entryBlocks_.push_back(tmpBlock);
570 // special alpha-case "multiple" entry (really multiple lookup)
571 image_->funcsByEntryAddr[funcEntryAddr] = this;
573 // End alpha-specific weirdness
575 // Note that the current function is in-progess to avoid
576 // spurious duplication for loops in the call graph
577 img()->activelyParsing[getOffset()] = this;
581 parseSharedBlocks(ph_entryBlock);
585 buildCFG(entryBlocks_, funcBegin);
591 img()->activelyParsing.undef(getOffset());
596 /* buildCFG does the bulk of the work in parsing a function. It is
597 * responsible for following the control-flow through a function, building
598 * up the set of basic blocks that compose it, and connecting these blocks
602 * - noncontiguous blocks
603 * - shared blocks (between other functions)
604 * - previously parsed blocks
606 * Architecture Independance:
607 * All architecture-specific code is abstracted away in the image-arch
610 bool image_func::buildCFG(
611 pdvector<image_basicBlock *>& funcEntries,
615 pdvector< Address > worklist;
617 // Basic block lookup
618 BPatch_Set< Address > leaders;
619 BPatch_Set< Address > allInstAddrs;
620 dictionary_hash< Address, image_basicBlock* > leadersToBlock( addrHash );
622 // Prevent overrunning an existing basic block in linear scan
623 Address nextExistingBlockAddr;
624 image_basicBlock *nextExistingBlock = NULL;
626 // Instructions and InstPoints
627 pdvector< instruction > allInstructions;
628 unsigned numInsns = 0;
635 // ** Use to special-case abort and exit calls
636 dictionary_hash<Address, std::string> *pltFuncs = img()->getPltFuncs();
638 // ** end abort/exit setup
640 Address funcEnd = funcBegin;
641 Address currAddr = funcBegin;
643 // The reverse ordering here is to make things easier on
644 // alpha. Indeed, the only reason funcEntries would ever
645 // have size > 1 is if we're on alpha.
646 for(int j=funcEntries.size()-1; j >= 0; j--)
648 Address a = funcEntries[j]->firstInsnOffset();
650 leadersToBlock[a] = funcEntries[j];
651 worklist.push_back(a);
654 for(unsigned i=0; i < worklist.size(); i++)
656 InstrucIter ah(worklist[i],this);
658 image_basicBlock* currBlk = leadersToBlock[worklist[i]];
660 parsing_printf("[%s:%d] parsing block at 0x%lx, "
661 "first insn offset 0x%lx\n",
664 currBlk->firstInsnOffset());
667 assert(currBlk->firstInsnOffset() == worklist[i]);
669 // If this function has already parsed the block, skip
670 if(containsBlock(currBlk))
675 currBlk->isStub_ = false;
676 addToBlocklist(currBlk);
678 parsing_printf("- adding block %d (0x%lx) to blocklist\n",
679 currBlk->id(), currBlk->firstInsnOffset());
683 // non-stub block must have previously been parsed
684 parseSharedBlocks(currBlk, leaders, leadersToBlock, funcEnd);
688 // Remember where the next block is, so we don't blindly run over
689 // the top of it when scanning through instructions.
690 nextExistingBlockAddr = ULONG_MAX;
691 if(image_->basicBlocksByRange.successor(ah.peekNext(),tmpRange))
693 nextExistingBlock = dynamic_cast<image_basicBlock*>(tmpRange);
694 if(nextExistingBlock->firstInsnOffset_ > worklist[i])
696 nextExistingBlockAddr = nextExistingBlock->firstInsnOffset_;
700 while(true) // instructions in block
703 insnSize = ah.getInstruction().size();
705 // The following three cases ensure that we properly handle
706 // situations where our parsing comes across previously
708 if(currAddr == nextExistingBlockAddr)
710 assert(nextExistingBlock->firstInsnOffset_ == currAddr);
712 parsing_printf("rolling over existing block at 0x%lx\n",currAddr);
713 // end block with previous addr
714 // add edge to to this newly found block
715 // push it on the worklist vector
716 Address prevAddr = ah.peekPrev();
717 currBlk->lastInsnOffset_ = prevAddr;
718 currBlk->blockEndOffset_ = currAddr;
720 addEdge(currBlk,nextExistingBlock,ET_FALLTHROUGH);
722 if(!containsBlock(nextExistingBlock))
725 leadersToBlock[currAddr] = nextExistingBlock;
726 worklist.push_back(currAddr);
727 parsing_printf("- adding address 0x%lx to worklist\n",
730 // add this function to block ownership list if we're
731 // rolling over an unparsed stub of another function
732 // (i.e., the next block is the unparsed entry block
734 if(nextExistingBlock->isStub_ &&
735 !nextExistingBlock->containedIn(this))
737 nextExistingBlock->addFunc(this);
742 #if defined(arch_x86) || defined(arch_x86_64)
743 // These portions correspond only to situations that arise
744 // because of overlapping but offset instruction streams --
745 // the kind that only happen on x86.
747 else if(allInstAddrs.contains( currAddr ))
749 // This address has been seen but is not the start of
750 // a basic block. This has the following interpretation:
751 // The current instruction stream has become aligned with
752 // an existing (previously parsed) instruction stream.
753 // This should only be possible on x86, and in normal code
754 // it should only be seen when dealing with cases like
755 // instruction prefixes (where the bytes of two different
756 // versions of an instruction effectively overlap).
758 // XXX Because our codeRangeTree has no support for
759 // overlapping ranges, we are only capable of supporting the
760 // case where the current instruction stream starts at a
761 // lower value in the address space than the stream it
762 // overlaps with. This is sufficient to handle the
763 // prefixed instruction case (because of the order we
764 // place blocks on the work list), but any other case
765 // will make the function uninstrumentable.
767 if(currAddr > currBlk->firstInsnOffset_) {
768 currBlk->lastInsnOffset_ = ah.peekPrev();
769 currBlk->blockEndOffset_ = currAddr;
771 // The newly created basic block will split
772 // the existing basic block that encompasses this
774 addBasicBlock(currAddr,
781 parsing_printf(" ... uninstrumentable due to instruction stream overlap\n");
782 currBlk->lastInsnOffset_ = currAddr;
783 currBlk->blockEndOffset_ = ah.peekNext();
784 instLevel_ = UNINSTRUMENTABLE;
788 else if(currAddr > nextExistingBlockAddr)
790 // Overlapping instructions on x86 can mean that we
791 // can step over the start of an existing block because
792 // our instruction stream does not exactly coincide with its
793 // instruction stream. In that case, find the next existing
794 // block address and reprocess this instruction.
796 nextExistingBlockAddr = ULONG_MAX;
797 if(image_->basicBlocksByRange.successor(currAddr,tmpRange))
800 dynamic_cast<image_basicBlock*>(tmpRange);
802 nextExistingBlockAddr = nextExistingBlock->firstInsnOffset_;
804 continue; // reprocess current instruction
808 allInstructions.push_back( ah.getInstruction() );
809 allInstAddrs += currAddr;
811 // only ever true on x86
812 if(ah.isFrameSetup() && savesFP_)
814 noStackFrame = false;
817 // Special architecture-specific instruction processing
818 archInstructionProc( ah );
820 if( ah.isACondBranchInstruction() )
822 currBlk->lastInsnOffset_ = currAddr;
823 currBlk->blockEndOffset_ = ah.peekNext();
825 if( currAddr >= funcEnd )
826 funcEnd = ah.peekNext();
827 //funcEnd = currAddr + insnSize;
829 Address target = ah.getBranchTargetAddress();
831 if( !img()->isValidAddress( target ) ) {
832 p = new image_instPoint(currAddr,
839 pdmod()->addUnresolvedControlFlow(p);
841 retStatus_ = RS_UNKNOWN;
846 // Skip delay slot (sparc)
850 // Process target first, to prevent overlap between
851 // the fallthrough block and the target block.
852 if( target < funcBegin )
854 parsing_printf("Parse of mod:func[%s:%s] Tying up conditional "
855 "jump target to address above the funcEntry"
856 "[0x%lx]: jump[0x%lx]=>[0x%lx]\n",
857 img()->name().c_str(),
858 prettyName().c_str(),funcBegin, currAddr, target);
860 if ( img()->isValidAddress( target ) ) {
861 addBasicBlock(target,
869 Address t2 = ah.peekNext();
871 // If this branch instruction split the current block,
872 // the fallthrough should be added to the newly created
873 // block instead of this current block.
874 image_basicBlock * real_src;
875 if(target > currBlk->firstInsnOffset_ && target <= currAddr) {
876 real_src = leadersToBlock[target];
891 else if( ah.isAIndirectJumpInstruction() )
893 currBlk->lastInsnOffset_ = currAddr;
894 currBlk->blockEndOffset_ = ah.peekNext();
895 parsing_printf("... indirect jump at 0x%lx\n", currAddr);
897 if( currAddr >= funcEnd )
898 funcEnd = ah.peekNext();
900 numInsns = allInstructions.size() - 2;
901 if( numInsns == 0 ) {
902 // this "function" is a single instruction long
903 // (a jmp to the "real" function)
904 parsing_printf("... uninstrumentable due to 0 size\n");
905 instLevel_ = UNINSTRUMENTABLE;
906 //endOffset_ = startOffset_;
907 endOffset_ = getOffset();
908 p = new image_instPoint(currAddr,
915 pdmod()->addUnresolvedControlFlow(p);
917 // can't properly determine return status
918 retStatus_ = RS_UNKNOWN;
922 if( archIsIndirectTailCall(ah) )
924 // looks like a tail call
925 currBlk->isExitBlock_ = true;
926 p = new image_instPoint(currAddr,
933 pdmod()->addUnresolvedControlFlow(p);
935 // can't properly determine return status
936 retStatus_ = RS_UNKNOWN;
940 BPatch_Set< Address > targets;
941 BPatch_Set< Address >::iterator iter;
942 if( archIsIPRelativeBranch(ah))
944 processJump(ah, currBlk,
945 funcBegin, funcEnd, allInstructions, leaders, worklist,
946 leadersToBlock, pltFuncs);
951 // get the list of targets
952 if(!archGetMultipleJumpTargets(targets,currBlk,ah,
955 instLevel_ = HAS_BR_INDIR;
956 canBeRelocated_ = false;
957 p = new image_instPoint(currAddr,
964 pdmod()->addUnresolvedControlFlow(p);
966 retStatus_ = RS_UNKNOWN;
969 iter = targets.begin();
970 bool foundBadTarget = false;
971 for(iter = targets.begin(); iter != targets.end(); iter++)
973 if( !img()->isValidAddress( *iter ) ) {
974 if (!foundBadTarget) {
975 p = new image_instPoint(currAddr,
982 pdmod()->addUnresolvedControlFlow(p);
983 foundBadTarget = true;
986 retStatus_ = RS_UNKNOWN;
990 if(*iter < funcBegin) {
991 // FIXME see cond branch note, above
992 currBlk->isExitBlock_ = true;
993 // FIXME if this stays an exit block, it needs an exit
1008 else if( ah.isAJumpInstruction() )
1010 processJump(ah, currBlk,
1011 funcBegin, funcEnd, allInstructions, leaders, worklist,
1012 leadersToBlock, pltFuncs);
1015 else if( ah.isAReturnInstruction() )
1018 parsing_printf("... 0x%lx (%d) is a return\n", currAddr,
1019 currAddr - getOffset());
1021 currBlk->lastInsnOffset_ = currAddr;
1022 currBlk->blockEndOffset_ = ah.peekNext();
1023 currBlk->isExitBlock_ = true;
1025 if( currAddr >= funcEnd )
1026 funcEnd = ah.peekNext();
1028 parsing_printf("... making new exit point at 0x%lx\n", currAddr);
1029 p = new image_instPoint( currAddr,
1030 ah.getInstruction(),
1033 funcReturns.push_back( p );
1034 currBlk->containsRet_ = true;
1035 retStatus_ = RS_RETURN;
1037 if (ah.getInstruction().isCleaningRet()) {
1038 cleansOwnStack_ = true;
1043 else if(ah.isACondReturnInstruction() )
1045 parsing_printf("cond ret branch at 0x%lx\n", currAddr);
1046 currBlk->lastInsnOffset_ = currAddr;
1047 currBlk->blockEndOffset_ = ah.peekNext();
1048 currBlk->isExitBlock_ = true;
1050 if( currAddr >= funcEnd )
1051 funcEnd = ah.peekNext();
1053 // And make an inst point
1054 p = new image_instPoint(currAddr,
1055 ah.getInstruction(),
1058 parsing_printf("Function exit at 0x%x\n", *ah);
1059 funcReturns.push_back(p);
1060 currBlk->containsRet_ = true;
1061 retStatus_ = RS_RETURN;
1063 Address t2 = ah.peekNext();
1072 else if( ah.isACallInstruction() ||
1073 ah.isADynamicCallInstruction() )
1075 currBlk->lastInsnOffset_ = currAddr;
1076 currBlk->blockEndOffset_ = ah.peekNext();
1078 if( currAddr >= funcEnd )
1079 funcEnd = ah.peekNext();
1081 //validTarget is set to false if the call target is not a
1082 //valid address in the applications process space
1083 bool validTarget = true;
1084 //simulateJump is set to true if the call should be treated
1085 //as an unconditional branch for the purposes of parsing
1087 bool simulateJump = false;
1089 image_func *targetFunc = NULL;
1091 bool isAbsolute = false;
1092 Address target = ah.getBranchTargetAddress(&isAbsolute);
1094 if(archIsRealCall(ah, validTarget, simulateJump))
1096 if(ah.isADynamicCallInstruction()) {
1097 p = new image_instPoint( currAddr,
1098 ah.getInstruction(),
1104 pdmod()->addUnresolvedControlFlow(p);
1106 parsing_printf("[%s:%u] dynamic call 0x%lx -> ?\n",
1107 FILE__,__LINE__,currAddr);
1110 p = new image_instPoint( currAddr,
1111 ah.getInstruction(),
1117 parsing_printf("[%s:%u] binding call 0x%lx -> 0x%lx\n",
1118 FILE__,__LINE__,currAddr,target);
1120 // bindCallTarget will look up the target function;
1121 // if it does not exist, parsing will be initiated
1122 targetFunc = bindCallTarget(target,currBlk);
1124 calls.push_back( p );
1125 currBlk->containsCall_ = true;
1128 if(validTarget == false) {
1129 parsing_printf("[%s:%u] call at 0x%lx targets invalid "
1131 FILE__,__LINE__,currAddr,target);
1133 if (!img()->isCode(target)) {
1134 p = new image_instPoint(currAddr,
1135 ah.getInstruction(),
1140 pdmod()->addUnresolvedControlFlow(p);
1143 currBlk->canBeRelocated_ = false;
1144 canBeRelocated_ = false;
1146 else if(simulateJump)
1148 parsing_printf("[%s:%u] call at 0x%lx simulated as "
1150 FILE__,__LINE__,currAddr,target);
1152 addBasicBlock(target,
1161 if (ah.isDelaySlot()) {
1162 // Delay slots get skipped; effectively pinned to
1168 if(targetFunc && (targetFunc->symTabName() == "exit" ||
1169 targetFunc->symTabName() == "abort" ||
1170 targetFunc->symTabName() == "__f90_stop" ||
1171 targetFunc->symTabName() == "fancy_abort"))
1173 parsing_printf("Call to %s (%lx) detected at 0x%lx\n",
1174 targetFunc->symTabName().c_str(),
1177 else if((*pltFuncs).defines(target) &&
1178 ((*pltFuncs)[target] == "exit" ||
1179 (*pltFuncs)[target] == "abort" ||
1180 (*pltFuncs)[target] == "__f90_stop" ||
1181 (*pltFuncs)[target] == "fancy_abort"))
1183 parsing_printf("Call to %s (%lx) detected at 0x%lx\n",
1184 (*pltFuncs)[target].c_str(),
1187 else if(!simulateJump)
1189 // we don't wire up a fallthrough edge if we're treating
1190 // the call insruction as an unconditional branch
1192 // link up the fallthrough edge unless we know for
1193 // certain that the target function does not return,
1194 // or if the target is an entry in the PLT (and not
1195 // one of the special-case non-returning entries above)
1196 if(targetFunc && targetFunc->returnStatus() == RS_NORETURN
1197 && !(*pltFuncs).defines(target))
1199 parsing_printf("[%s:%u] not parsing past non-returning "
1200 "call at 0x%lx (to %s)\n",
1201 FILE__,__LINE__,*ah,
1202 targetFunc->symTabName().c_str());
1206 Address next = ah.peekNext();
1217 else if( ah.isALeaveInstruction() )
1219 noStackFrame = false;
1221 else if( archIsAbortOrInvalid(ah) )
1223 // some architectures have invalid instructions, and
1224 // all have special 'abort-causing' instructions
1226 currBlk->lastInsnOffset_ = currAddr;
1227 currBlk->blockEndOffset_ = ah.peekNext();
1229 if( currAddr >= funcEnd )
1230 funcEnd = ah.peekNext();
1234 #if defined(arch_ia64)
1235 else if( ah.isAnAllocInstruction() )
1237 // IA64 only, sad to say
1238 if( currAddr == currBlk->firstInsnOffset() )
1240 allocs.push_back( currAddr );
1244 currBlk->lastInsnOffset_ = ah.peekPrev();
1245 currBlk->blockEndOffset_ = currAddr;
1247 // remove some cumulative information
1248 allInstAddrs.remove(currAddr);
1249 allInstructions.pop_back();
1251 addBasicBlock(currAddr,
1261 if (!img()->isValidAddress(ah.peekNext())) {
1262 //The next instruction is not in the.text segment. We'll
1263 // abort this basic block as if it were terminating with
1264 // an illegal instruction.
1265 parsing_printf("Next instruction is invalid, ending basic block\n");
1267 currBlk->lastInsnOffset_ = currAddr;
1268 currBlk->blockEndOffset_ = ah.peekNext();
1270 if( currAddr >= funcEnd )
1271 funcEnd = ah.peekNext();
1272 parsing_printf("... making new exit point at 0x%lx\n", currAddr);
1273 p = new image_instPoint( currAddr,
1274 ah.getInstruction(),
1277 funcReturns.push_back( p );
1284 endOffset_ = funcEnd;
1286 // If the status hasn't been updated to UNKNOWN or RETURN by now,
1287 // set it to RS_NORETURN
1288 if(retStatus_ == RS_UNSET)
1289 retStatus_ = RS_NORETURN;
1294 /* bindCallTarget links the target address of a call to the function
1295 * and entry block to which it refers. If such a function/block pair
1296 * does not exist (or if the target is the middle of another function),
1297 * a new function will be created and parsed.
1299 * Returns: pointer to the [parsed] function object that is target of call
1302 * - May split existing blocks
1303 * - May mark existing blocks as shared
1304 * - May recursively parse one or more functions
1306 * This function can be called with NULL currBlk as a shorthand for
1307 * finding or recursively parsing the target
1309 image_func * image_func::bindCallTarget(
1311 image_basicBlock* currBlk)
1313 codeRange *tmpRange;
1314 image_basicBlock *ph_callTarget = NULL;
1315 image_basicBlock *newBlk;
1316 image_func *targetFunc;
1317 bool created = false;
1319 // targetfunc may be parsed or unparsed, and it may not yet have
1320 // an entry basic block associated with it. `created' indicates
1321 // whether a new image_func was created
1322 targetFunc = FindOrCreateFunc(target,FS_RT,created);
1324 if(image_->basicBlocksByRange.find(target,tmpRange))
1327 dynamic_cast<image_basicBlock*>(tmpRange);
1330 if(ph_callTarget && ph_callTarget->firstInsnOffset_ == target )
1333 addEdge(currBlk,ph_callTarget,ET_CALL);
1336 // going to need a new basic block
1337 newBlk = new image_basicBlock(targetFunc,target);
1340 // The target lies within an existing block, which
1341 // must therefore be split
1342 ph_callTarget->split(newBlk);
1345 newBlk->isStub_ = true;
1347 image_->basicBlocksByRange.insert(newBlk);
1350 addEdge(currBlk,newBlk,ET_CALL);
1353 // Now parse the function, if necessary
1354 if(!targetFunc->parsed()) {
1355 assert( targetFunc->img()->isCode(targetFunc->getOffset()) );
1357 parsing_printf("[%s:%u] recursive parsing of call target at 0x%lx\n",
1358 FILE__,__LINE__,targetFunc->getOffset());
1360 if(targetFunc->parse()) {
1361 targetFunc->img()->recordFunction(targetFunc);
1363 parsing_printf("[%s:%u] recursive parsing of 0x%lx complete\n",
1364 FILE__,__LINE__,targetFunc->getOffset());
1366 parsing_printf("[%s:%u] recursive parsing of 0x%lx failed\n",
1367 FILE__,__LINE__,targetFunc->getOffset());
1369 /* XXX Because of the enormous amount of references to functions
1370 created during parsing, it is not currently safe to
1371 delete image_func objects once they have been partially
1372 parsed. This needs a design-level fix.
1373 // only want to delete this function if FindOrCreateFunc
1374 // actually created one
1387 * Look up or create a new function for this image at a given address,
1388 * depending on whether one exists in the symbol table (or was previously
1391 * Returns: pointer to an existing or new function
1393 image_func * image_func::FindOrCreateFunc(Address target,
1397 image_func *targetFunc;
1399 if(image_->funcsByEntryAddr.defines(target))
1401 targetFunc = image_->funcsByEntryAddr[target];
1405 if(img()->activelyParsing.defines(target))
1406 targetFunc = img()->activelyParsing[target];
1409 #if defined (os_windows)
1410 _snprintf(name,32,"targ%lx",target);
1412 snprintf(name,32,"targ%lx",target);
1414 targetFunc = new image_func(name,target,UINT_MAX,mod_,image_,src);
1422 /* parseSharedBlocks handles the case where a reachable path of blocks
1423 * in a function have already been parsed by another function. All blocks
1424 * reachable from firstBlock will be added to the current function, and
1425 * will be updated to reflect their shared status.
1427 * Call edges are, as in normal parsing, not followed.
1429 * Side effects: (many)
1431 * - Adds to leadersToBlock
1432 * - May update other functions' "shared" status
1435 void image_func::parseSharedBlocks(image_basicBlock * firstBlock,
1436 BPatch_Set< Address > &leaders,
1437 dictionary_hash< Address, image_basicBlock * > &leadersToBlock,
1440 pdvector< image_basicBlock * > WL;
1441 pdvector< image_edge * > targets;
1443 BPatch_Set< image_basicBlock * > visited;
1445 image_basicBlock * curBlk;
1446 image_basicBlock * child;
1448 image_instPoint * tmpInstPt;
1449 image_instPoint * cpyInstPt;
1451 WL.push_back(firstBlock);
1452 visited.insert(firstBlock);
1454 parsing_printf("[%s:%u] Parsing shared code at 0x%lx, startoffset: "
1455 "0x%lx endoffset: 0x%lx\n",
1456 FILE__,__LINE__,firstBlock->firstInsnOffset_, getOffset(), endOffset_);
1458 // remember that we have shared blocks
1459 containsSharedBlocks_ = true;
1461 // assume that return status is UNKNOWN because we do not do a detailed
1462 // parse of the code. This assumption leads to the greatest code
1464 if(retStatus_ == RS_UNSET)
1465 retStatus_ = RS_UNKNOWN;
1467 // There are several cases that can lead to a pre-parsed block
1468 // having the current function (this) on its funcs_ list:
1470 // 1. The shared block is the result of splitting a block in another
1471 // function (current function added to funcs_ in block creation)
1473 // 2. The shared block was created by a call operation to an unparsed
1474 // function *and then subsequently parsed prior to parsing the
1475 // function for which it is the entry block* (whew). In that case
1476 // the entire "called" function is pre-parsed, but it is already
1477 // on the funcs_ list of the *first* block (bound up in
1480 // In both cases, 'this' will always be the first function on the funcs_
1481 // list, if it is there at all, so we always check whether funcs_[0] ==
1482 // this prior to adding this to the funcs_ list.
1485 while(WL.size() > 0)
1490 if(containsBlock(curBlk))
1493 // If the block isn't shared, it only has one function that doesn't
1494 // yet know that it contains shared code.
1495 if(!curBlk->isShared_)
1497 image_func * first = *curBlk->funcs_.begin();
1498 first->containsSharedBlocks_ = true;
1499 first->needsRelocation_ = true;
1502 // If this block is a stub, we'll revisit it when parsing is done
1503 if(curBlk->isStub_) {
1504 img()->reparse_shared.push_back(
1505 pair<image_basicBlock *, image_func *>(curBlk,this));
1506 parsing_printf("XXXX posponing stub block %d (0x%lx) for later\n",
1507 curBlk->id(),curBlk->firstInsnOffset_);
1511 /* Copy instrumentation points (if any) to this function that
1512 will incorporate the shared code.
1515 Given parsing errors that are possible given misparsed
1516 control flow (e.g., badly parsed indirect control flow,
1517 non-returning functions, and opaque conditional branches),
1518 it is possible for misaligned blocks to overlap &
1519 include the same instpoint. This is /not safe/ and should
1520 be addressed in a new, overlap-friendly parser, but
1521 in the meanwhile it is imperative that instpoints are not
1522 duplicated in these [rare] cases.
1525 For the moment we are not implementing a check before
1526 copy to continue to exercise parsing bugs.
1529 // note that these have to occur before this function
1530 // is added to the block's ownership list
1531 if((tmpInstPt = curBlk->getCallInstPoint()) != NULL)
1533 parsing_printf("... copying call point at 0x%lx\n", tmpInstPt->offset());
1534 cpyInstPt = new image_instPoint(tmpInstPt->offset(),
1537 tmpInstPt->callTarget(),
1538 tmpInstPt->isDynamic());
1539 calls.push_back(cpyInstPt);
1541 if((tmpInstPt = curBlk->getRetInstPoint()) != NULL)
1543 parsing_printf("... copying exit point at 0x%lx\n", tmpInstPt->offset());
1544 cpyInstPt = new image_instPoint(tmpInstPt->offset(),
1547 tmpInstPt->getPointType());
1548 funcReturns.push_back(cpyInstPt);
1550 retStatus_ = RS_RETURN;
1553 // As described in the comment above, the
1554 // only cases in which 'this' could be on the block's funcs_ list
1555 // are when it is the first entry.
1556 if(*curBlk->funcs_.begin() != this) {
1557 curBlk->addFunc(this);
1561 addToBlocklist(curBlk);
1562 parsing_printf("XXXX adding pre-parsed block %d (0x%lx) to blocklist\n",
1563 curBlk->id(),curBlk->firstInsnOffset_);
1564 // update "function end"
1565 if(funcEnd < curBlk->blockEndOffset_)
1566 funcEnd = curBlk->blockEndOffset_;
1569 curBlk->getTargets(targets);
1571 for(unsigned int i=0;i<targets.size();i++)
1573 child = targets[i]->getTarget();
1575 if(targets[i]->getType() != ET_CALL && !visited.contains(child))
1577 leaders += child->firstInsnOffset_;
1578 leadersToBlock[ child->firstInsnOffset_ ] = child;
1579 WL.push_back(child);
1580 visited.insert(child);
1584 #if defined (cap_use_pdvector)
1593 /* A specialized version for parsing shared blocks when we don't care
1594 * to save any information about the blocks that were visited (that is,
1595 * when the entire function is known to be shared and so no other normal
1596 * parsing is taking place).
1598 void image_func::parseSharedBlocks(image_basicBlock * firstBlock)
1600 BPatch_Set< Address > leaders;
1601 dictionary_hash< Address, image_basicBlock * > leadersToBlock( addrHash );
1603 endOffset_ = getOffset();
1605 parseSharedBlocks(firstBlock,
1606 leaders, /* unused */
1607 leadersToBlock, /* unused */
1611 void image_func::processJump(InstrucIter& ah,
1612 image_basicBlock* currBlk,
1615 pdvector<instruction>& allInstructions,
1616 BPatch_Set<Address>& leaders,
1617 pdvector<Address>& worklist,
1618 dictionary_hash<Address, image_basicBlock*>& leadersToBlock,
1619 dictionary_hash<Address, std::string> *pltFuncs
1622 Address currAddr = *ah;
1625 currBlk->lastInsnOffset_ = currAddr;
1626 currBlk->blockEndOffset_ = ah.peekNext();
1628 if( currAddr >= funcEnd )
1629 funcEnd = ah.peekNext();
1631 Address target = ah.getBranchTargetAddress();
1633 parsing_printf("... 0x%lx is a jump to 0x%lx\n",
1639 if(archProcExceptionBlock(catchStart,ah.peekNext()))
1641 addBasicBlock(catchStart,
1649 if( !img()->isValidAddress( target ) ) {
1650 p = new image_instPoint(currAddr,
1651 ah.getInstruction(),
1657 pdmod()->addUnresolvedControlFlow(p);
1659 retStatus_ = RS_UNKNOWN;
1663 if(archIsATailCall( ah, allInstructions ) ||
1664 (*pltFuncs).defines(target))
1666 // XXX this output is for testing only; remove
1667 if((*pltFuncs).defines(target)) {
1668 parsing_printf("[%s:%u] tail call into plt %lx -> %lx\n",
1669 FILE__,__LINE__,*ah,target);
1672 // Only on x86 & sparc currently
1673 currBlk->isExitBlock_ = true;
1675 p = new image_instPoint(currAddr,
1676 ah.getInstruction(),
1682 funcReturns.push_back( p );
1683 currBlk->containsRet_ = true;
1685 parsing_printf("[%s:%u] tail call 0x%lx -> 0x%lx\n",
1686 FILE__,__LINE__,*ah,target);
1687 image_func *targetFunc = bindCallTarget(target,currBlk);
1689 // Functions that make tail calls inherit the return
1690 // status of their call targets
1691 if(retStatus_ == RS_UNSET &&
1692 targetFunc->returnStatus() != RS_NORETURN)
1694 retStatus_ = targetFunc->returnStatus();
1697 parsing_printf("[%s:%u] unparseable tail call at 0x%lx",
1698 FILE__,__LINE__,*ah);
1699 retStatus_ = RS_UNKNOWN;
1704 if( target < funcBegin ) {
1705 parsing_printf("Parse of mod:func[%s:%s] Tying up jump target "
1706 "to address above the funcEntry[0x%lx]: "
1707 "jump[0x%lx]=>[0x%lx]\n", img()->name().c_str(),
1708 prettyName().c_str(),funcBegin, currAddr, target);
1710 parsing_printf("... tying up unconditional branch target\n");
1712 addBasicBlock(target,