Mark CodeObjects unparseable when no valid code regions exist
[dyninst.git] / parseAPI / src / Parser.C
1 /*
2  * Copyright (c) 1996-2009 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as "Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * By your use of Paradyn, you understand and agree that we (or any
12  * other person or entity with proprietary rights in Paradyn) are
13  * under no obligation to provide either maintenance services,
14  * update services, notices of latent defects, or correction of
15  * defects for Paradyn.
16  * 
17  * This library is free software; you can redistribute it and/or
18  * modify it under the terms of the GNU Lesser General Public
19  * License as published by the Free Software Foundation; either
20  * version 2.1 of the License, or (at your option) any later version.
21  * 
22  * This library is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * Lesser General Public License for more details.
26  * 
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30  */
31
32 #include <vector>
33 #include <limits>
34
35 #include "dyntypes.h"
36
37 #include "CodeObject.h"
38 #include "CFGFactory.h"
39 #include "ParseCallback.h"
40 #include "Parser.h"
41 #include "CFG.h"
42 #include "util.h"
43 #include "debug_parse.h"
44
45 #if defined(os_solaris)
46 // XXX old GCC versions on solaris cause problems with
47 //     boost at Wisconsin.
48 #define BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
49 #include <boost/tuple/tuple.hpp>
50 #undef BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
51 #else
52 #include <boost/tuple/tuple.hpp>
53 #endif
54
55 using namespace std;
56 using namespace Dyninst;
57 using namespace Dyninst::ParseAPI;
58 #if defined(cap_instruction_api)
59 using namespace Dyninst::InstructionAPI;
60 #endif
61
62 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
63 typedef vector< edge_pair_t > Edges_t;
64
65
66 namespace {
67     struct less_cr {
68      bool operator()(CodeRegion * x, CodeRegion * y) 
69      { 
70          return x->offset() < y->offset();
71      }
72     };
73 }
74
75 Parser::Parser(CodeObject & obj, CFGFactory & fact, ParseCallback & pcb) :
76     _obj(obj),
77     _cfgfact(fact),
78     _pcb(pcb),
79     _parse_data(NULL),
80     _sink(NULL),
81     _parse_state(UNPARSED),
82     _in_parse(false),
83     _in_finalize(false)
84 {
85     // cache plt entries for fast lookup
86     const map<Address, string> & lm = obj.cs()->linkage();
87     map<Address, string>::const_iterator lit = lm.begin();
88     for( ; lit != lm.end(); ++lit) {
89         parsing_printf("Cached PLT entry %s (%lx)\n",lit->second.c_str(),lit->first);
90         plt_entries[lit->first] = lit->second;
91     }
92
93     if(obj.cs()->regions().empty()) {
94         parsing_printf("[%s:%d] CodeSource provides no CodeRegions"
95                        " -- unparesable\n",
96             FILE__,__LINE__);
97         _parse_state = UNPARSEABLE;
98         return;
99     }
100
101     // check whether regions overlap
102     vector<CodeRegion *> const& regs = obj.cs()->regions();
103     vector<CodeRegion *> copy(regs.begin(),regs.end());
104     sort(copy.begin(),copy.end(),less_cr());
105
106     // allocate a sink block -- region is arbitrary
107     _sink = _cfgfact.mksink(&_obj,copy[0]);
108
109     bool overlap = false;
110     CodeRegion * prev = copy[0], *cur = NULL;
111     for(unsigned i=1;i<copy.size();++i) {
112         cur = copy[i];
113         if(cur->offset() < prev->offset() + prev->length()) {
114             parsing_printf("Overlapping code regions [%lx,%lx) and [%lx,%lx)\n",
115                 prev->offset(),prev->offset()+prev->length(),
116                 cur->offset(),cur->offset()+cur->length());
117             overlap = true;
118             break;
119         }
120     }
121
122     if(overlap)
123         _parse_data = new OverlappingParseData(this,copy);
124     else
125         _parse_data = new StandardParseData(this);
126 }
127
128 ParseFrame::~ParseFrame()
129 {
130     // no locally allocated storage
131 }
132
133 Parser::~Parser()
134 {
135     if(_parse_data)
136         delete _parse_data;
137
138     vector<ParseFrame *>::iterator fit = frames.begin();
139     for( ; fit != frames.end(); ++fit) 
140         delete *fit;
141     frames.clear();
142 }
143
144 void
145 Parser::add_hint(Function * f)
146 {
147     if(!_parse_data->findFunc(f->region(),f->addr()))
148         record_func(f);
149 }
150
151 void
152 Parser::parse()
153 {
154     parsing_printf("[%s:%d] parse() called on Parser with state %d\n",
155         FILE__,__LINE__,_parse_state);
156
157     if(_parse_state == UNPARSEABLE)
158         return;
159
160     assert(!_in_parse);
161     _in_parse = true;
162
163     parse_vanilla();
164
165     // anything else by default...?
166
167     if(_parse_state < COMPLETE)
168         _parse_state = COMPLETE;
169     
170     _in_parse = false;
171 }
172
173 void
174 Parser::parse_at(
175     CodeRegion * region,
176     Address target,
177     bool recursive,
178     FuncSource src)
179 {
180     Function *f;
181     ParseFrame *pf;
182     vector<ParseFrame *> work;
183
184     parsing_printf("[%s:%d] entered parse_at([%lx,%lx),%lx)\n",
185         FILE__,__LINE__,region->low(),region->high(),target);
186
187     if(!region->contains(target)) {
188         parsing_printf("\tbad address, bailing\n");
189         return;
190     }
191
192     if(_parse_state < PARTIAL)
193         _parse_state = PARTIAL;
194
195     f = _parse_data->get_func(region,target,src);
196     if(!f) {
197         parsing_printf("   could not create function at %lx\n",target);
198         return;
199     }
200
201     ParseFrame::Status exist = _parse_data->frameStatus(region,target);
202     if(exist != ParseFrame::BAD_LOOKUP && exist != ParseFrame::UNPARSED) {
203         parsing_printf("   function at %lx already parsed, status %d\n",
204             target, exist);
205         return;
206     }
207
208     if(!(pf = _parse_data->findFrame(region,target))) {
209         pf = new ParseFrame(f,_parse_data);
210         init_frame(*pf);
211         frames.push_back(pf);
212         _parse_data->record_frame(pf);
213     } 
214
215     work.push_back(pf);
216     parse_frames(work,recursive);
217
218     // downgrade state if necessary
219     if(_parse_state > COMPLETE)
220         _parse_state = COMPLETE;
221
222 }
223
224 void
225 Parser::parse_at(Address target, bool recursive, FuncSource src)
226 {
227     CodeRegion * region = NULL;
228
229     parsing_printf("[%s:%d] entered parse_at(%lx)\n",FILE__,__LINE__,target);
230
231     if(_parse_state == UNPARSEABLE)
232         return;
233
234     StandardParseData * spd = dynamic_cast<StandardParseData *>(_parse_data);
235     if(!spd) {
236         parsing_printf("   parse_at is invalid on overlapping regions\n");
237         return;
238     }
239
240     region = spd->reglookup(region,target); // input region ignored for SPD
241     if(!region) {
242         parsing_printf("   failed region lookup at %lx\n",target);
243         return;
244     }
245
246     parse_at(region,target,recursive,src);
247 }
248
249 void
250 Parser::parse_vanilla()
251 {
252     ParseFrame *pf;
253     vector<ParseFrame *> work;
254     vector<Function *>::iterator fit;
255
256     parsing_printf("[%s:%d] entered parse_vanilla()\n",FILE__,__LINE__);
257     parsing_printf("\t%d function hints\n",hint_funcs.size());
258
259     if(_parse_state < PARTIAL)
260         _parse_state = PARTIAL;
261     else
262         parsing_printf("\tparse state is %d, some parsing already done\n",
263             _parse_state);
264
265     /* Initialize parse frames from hints */
266     for(fit=hint_funcs.begin();fit!=hint_funcs.end();++fit) {
267         Function * hf = *fit;
268         ParseFrame::Status test = frame_status(hf->region(),hf->addr());
269         if(test != ParseFrame::BAD_LOOKUP &&
270            test != ParseFrame::UNPARSED)
271         {
272             parsing_printf("\tskipping repeat parse of %lx [%s]\n",
273                 hf->addr(),hf->name().c_str());
274             continue;
275         }
276
277         pf = new ParseFrame(hf,_parse_data);
278         init_frame(*pf);
279         frames.push_back(pf);
280         work.push_back(pf);
281         _parse_data->record_frame(pf);
282     }
283
284     parse_frames(work,true);
285 }
286
287 void
288 Parser::parse_edges( vector< ParseWorkElem * > & work_elems )
289 {
290     if(_parse_state == UNPARSEABLE)
291         return;
292
293     for (unsigned idx=0; idx < work_elems.size(); idx++) {
294
295         ParseWorkElem *elem = work_elems[idx];
296         Block *src = elem->edge()->src();
297         ParseFrame *frame = _parse_data->findFrame
298             ( src->region(), 
299               src->lastInsnAddr() );
300         frame->pushWork(elem);
301
302         if(_parse_state < PARTIAL)
303             _parse_state = PARTIAL;
304         _in_parse = true;
305
306         vector< ParseFrame* > frames;
307         frames.push_back(frame);
308         parse_frames( frames, true );
309
310         if(_parse_state > COMPLETE)
311             _parse_state = COMPLETE;
312         _in_parse = false;
313
314     }
315
316     finalize();
317
318 }
319
320 void
321 Parser::parse_frames(vector<ParseFrame *> & work, bool recursive)
322 {
323     ParseFrame * pf;
324
325     /* Recursive traversal parsing */ 
326     while(!work.empty()) {
327         pf = work.back();
328         work.pop_back();
329
330         if(pf->status() == ParseFrame::PARSED)
331             continue;
332
333         parse_frame(*pf,recursive);
334         switch(pf->status()) {
335             case ParseFrame::CALL_BLOCKED: {
336                 parsing_printf("[%s] frame %lx blocked at %lx\n",
337                     FILE__,pf->func->addr(),pf->curAddr);
338
339                 assert(pf->call_target);        
340
341                 parsing_printf("    call target %lx\n",pf->call_target->addr());
342                 work.push_back(pf);
343     
344                 CodeRegion * cr = pf->call_target->region();
345                 Address targ = pf->call_target->addr();
346
347                 ParseFrame * tf = _parse_data->findFrame(cr,targ);
348                 if(!tf)
349                 {
350                     // sanity
351                     if(_parse_data->frameStatus(cr,targ) == ParseFrame::PARSED)
352                         assert(0);
353
354                     tf = new ParseFrame(pf->call_target,_parse_data);
355                     init_frame(*tf);
356                     frames.push_back(tf);
357                     _parse_data->record_frame(tf);
358                 }
359                 if(likely(recursive))
360                     work.push_back(tf);
361                 else {
362                     assert(0);
363                     // XXX should never get here
364                     //parsing_printf("    recursive parsing disabled\n");
365                 }
366                 break;
367             }
368             case ParseFrame::PARSED:
369                 parsing_printf("[%s] frame %lx complete, return status: %d\n",
370                     FILE__,pf->func->addr(),pf->func->_rs);
371                 pf->cleanup();
372                 break;
373             case ParseFrame::FRAME_ERROR:
374                 parsing_printf("[%s] frame %lx error at %lx\n",
375                     FILE__,pf->func->addr(),pf->curAddr);
376                 break;
377             default:
378                 assert(0 && "invalid parse frame status");
379         }
380     }
381
382     for(unsigned i=0;i<frames.size();++i) {
383         _parse_data->remove_frame(frames[i]);
384         delete frames[i];
385     }
386     frames.clear();
387 }
388
389 /* Finalizing all functions for consumption:
390   
391    - Finish delayed parsing
392    - Prepare and record FuncExtents for range-based lookup
393 */
394 void
395 Parser::finalize(Function *f)
396 {
397     if(f->_cache_valid) {
398         if (f->obj()->defensiveMode()) {
399             printf("WARNING: finalizing func at %lx with cache_valid == true"
400                    " %s[%d]\n", f->addr(),FILE__,__LINE__);
401         }
402         return;
403     }
404
405     if(!f->_parsed) {
406         parsing_printf("[%s:%d] Parser::finalize(f[%lx]) "
407                        "forced parsing\n",
408             FILE__,__LINE__,f->addr());
409         parse();
410     }
411
412     parsing_printf("[%s] finalizing %s (%lx)\n",
413         FILE__,f->name().c_str(),f->addr());
414
415     region_data * rd = _parse_data->findRegion(f->region());
416     assert(rd);
417
418     // finish delayed parsing and sorting
419     vector<Block*> const& blocks = f->blocks_int();
420
421     // if this is the first time we've parsed in this function, trigger
422     // new function callback
423     if (0 == f->_extents.size()) {
424         _pcb.newfunction_retstatus( f );
425     }
426     
427     // extents
428     _parse_data->remove_extents(f->_extents);
429     f->_extents.clear();
430
431     if(blocks.empty()) {
432         f->_cache_valid = true;
433         return;
434     }
435     
436     vector<Block*>::const_iterator bit = blocks.begin();
437     FuncExtent * ext = NULL;
438     Address ext_s = (*bit)->start();
439     Address ext_e = ext_s;
440
441     for( ; bit != blocks.end(); ++bit) {
442         Block * b = *bit;
443         if(b->start() > ext_e) {
444             ext = new FuncExtent(f,ext_s,ext_e);
445             parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
446             f->_extents.push_back(ext);
447             rd->funcsByRange.insert(ext);
448             ext_s = b->start();
449         }
450         ext_e = b->end();
451     }
452     ext = new FuncExtent(f,ext_s,ext_e);
453     parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
454     rd->funcsByRange.insert(ext);
455     f->_extents.push_back(ext);
456
457     f->_cache_valid = true;
458 }
459
460 void
461 Parser::finalize()
462 {
463     assert(!_in_finalize);
464     _in_finalize = true;
465
466     if(_parse_state < FINALIZED) {
467         finalize_funcs(hint_funcs);
468         finalize_funcs(discover_funcs);
469         _parse_state = FINALIZED;
470     }
471
472     _in_finalize = false;
473 }
474
475 void
476 Parser::finalize_funcs(vector<Function *> &funcs)
477 {
478     vector<Function *>::iterator fit = funcs.begin();
479     for( ; fit != funcs.end(); ++fit) {
480         finalize(*fit);
481     } 
482 }
483
484 void
485 Parser::record_func(Function *f) {
486     if(!f) return;
487
488     if(f->src() == HINT)
489         hint_funcs.push_back(f);
490     else
491         discover_funcs.push_back(f);
492
493     sorted_funcs.insert(f);
494
495     _parse_data->record_func(f);
496 }
497
498
499 void
500 Parser::init_frame(ParseFrame & frame)
501 {
502     Block * b;
503     Block * split = NULL;
504
505     // Find or create a block
506     b = block_at(frame.func, frame.func->addr(),split);
507     if(b) {
508         frame.leadersToBlock[frame.func->addr()] = b;
509         frame.func->_entry = b;
510         frame.seed = new ParseWorkElem(NULL,NULL,frame.func->addr(),true,false);
511         frame.pushWork(frame.seed);
512     } else {
513         parsing_printf("[%s] failed to initialize parsing frame\n",
514             FILE__);
515         return;
516     }
517
518     // FIXME these operations should move into the actual parsing
519     Address ia_start = frame.func->addr();
520     unsigned size = 
521      frame.codereg->offset() + frame.codereg->length() - ia_start;
522 #if defined(cap_instruction_api)
523     const unsigned char* bufferBegin =
524      (const unsigned char *)(frame.func->isrc()->getPtrToInstruction(ia_start));
525     InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
526     InstructionAdapter_t ah(dec, ia_start, frame.func->obj(),
527         frame.codereg, frame.func->isrc());
528 #else
529     InstrucIter iter(ia_start, size, frame.func->isrc());
530     InstructionAdapter_t ah(iter, 
531         frame.func->obj(),
532         frame.codereg,
533         frame.func->isrc());
534 #endif
535     if(ah.isStackFramePreamble())
536         frame.func->_no_stack_frame = false;
537     frame.func->_saves_fp = ah.savesFP();
538 }
539
540 void ParseFrame::cleanup()
541 {
542     for(unsigned i=0;i<work_bundles.size();++i)
543         delete work_bundles[i];
544 }
545
546 namespace {
547     inline Edge * bundle_call_edge(ParseWorkBundle * b)
548     {
549         if(!b) return NULL;
550
551         vector<ParseWorkElem*> const& elems = b->elems();
552         vector<ParseWorkElem*>::const_iterator it = elems.begin();
553         for( ; it != elems.end(); ++it) {
554             if((*it)->edge()->type() == CALL)
555                 return (*it)->edge();
556         }
557         return NULL;
558     }
559
560     /* 
561      * Look up the next block for detection of straight-line
562      * fallthrough edges into existing blocks.
563      */
564     inline std::pair<Address, Block*> get_next_block(
565         ParseFrame &frame, 
566         ParseData * _parse_data)
567     {
568         Block * nextBlock = NULL;
569         Address nextBlockAddr;
570
571         nextBlockAddr = numeric_limits<Address>::max();
572         region_data * rd = _parse_data->findRegion(frame.codereg);
573
574         if((nextBlock = rd->blocksByRange.successor(frame.curAddr)) &&
575            nextBlock->start() > frame.curAddr)
576         {
577             nextBlockAddr = nextBlock->start();   
578         }
579
580         return std::pair<Address,Block*>(nextBlockAddr,nextBlock);
581     }
582 }
583
584 void
585 Parser::parse_frame(ParseFrame & frame, bool recursive) {
586     /** Persistent intermediate state **/
587     ParseFrame::worklist_t & worklist = frame.worklist;
588     dyn_hash_map<Address, Block *> & leadersToBlock = frame.leadersToBlock;
589     Address & curAddr = frame.curAddr;
590     Function * func = frame.func;
591     dyn_hash_map<Address, bool> & visited = frame.visited;
592     unsigned & num_insns = frame.num_insns;
593     func->_cache_valid = false;
594
595     /** Non-persistent intermediate state **/
596     Address nextBlockAddr;
597     Block * nextBlock;
598
599     if(frame.status() == ParseFrame::UNPARSED) {
600         parsing_printf("[%s] ==== starting to parse frame %lx ====\n",
601             FILE__,frame.func->addr());
602         // prevents recursion of parsing
603         frame.func->_parsed = true;
604     } else {
605         parsing_printf("[%s] ==== resuming parse of frame %lx ====\n",
606             FILE__,frame.func->addr());
607     }
608
609     frame.set_status(ParseFrame::PROGRESS);
610
611     while(!worklist.empty()) {
612         Block * cur = NULL;
613         ParseWorkElem * work = frame.popWork();
614
615         if(work->order() == ParseWorkElem::call) {
616             Function * ct = NULL;
617             Edge * ce = NULL;
618
619             if(!work->callproc()) {
620                 parsing_printf("[%s] binding call %lx->%lx\n",
621                     FILE__,work->edge()->src()->lastInsnAddr(),work->target());
622             
623                 pair<Function*,Edge*> ctp =
624                     bind_call(frame,
625                               work->target(),
626                               work->edge()->src(),
627                               work->edge());
628                 ct = ctp.first;
629                 ce = ctp.second;
630
631                 work->mark_call();
632             } else {
633                 ct = _parse_data->findFunc(frame.codereg,work->target());
634                 ce = work->edge();
635             }
636
637             if(recursive && ct &&
638                (frame_status(ct->region(),ct->addr())==ParseFrame::UNPARSED || 
639                 frame_status(ct->region(),ct->addr())==ParseFrame::BAD_LOOKUP))
640             {
641                 // suspend this frame and parse the next
642                 parsing_printf("    [suspend frame %lx]\n", func->addr()); 
643                 frame.call_target = ct;
644                 frame.set_status(ParseFrame::CALL_BLOCKED);
645                 // need to re-visit this edge
646                 frame.pushWork(work);
647                 return;
648             }
649             else if(ct && work->tailcall()) {
650                if (ct->_rs == UNSET) {
651                   // Ah helll....
652                 frame.call_target = ct;
653                 frame.set_status(ParseFrame::CALL_BLOCKED);
654                 // need to re-visit this edge
655                 frame.pushWork(work);
656                 return;
657                }                  
658
659                 if(func->_rs != RETURN && ct->_rs > NORETURN)
660                     func->_rs = ct->_rs;
661             }
662
663             // check for catch blocks after non-returning calls
664             if(ct && ct->_rs == NORETURN) {
665                 Address catchStart;
666                 Block * caller = ce->src();
667                 if(frame.codereg->findCatchBlock(caller->end(),catchStart)) {
668                     parsing_printf("[%s] found post-return catch block %lx\n",
669                         FILE__,catchStart);
670
671                     // make an edge
672                     Edge * catch_edge = link_tempsink(caller,CATCH);
673                 
674                     // push on worklist
675                     ParseWorkElem * catch_work = 
676                        work->bundle()->add(
677                         new ParseWorkElem(
678                             work->bundle(),
679                             catch_edge,
680                             catchStart,
681                             true, false)
682                        );
683                     frame.pushWork(catch_work);
684                 }
685             }
686     
687             continue;
688         } else if(work->order() == ParseWorkElem::call_fallthrough) {
689             // check associated call edge's return status
690             Edge * ce = bundle_call_edge(work->bundle());
691             if(!ce) {
692                 // odd; no call edge in this bundle
693                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
694                     FILE__,work->edge()->src()->lastInsnAddr());
695             }
696             else {
697                 Address target = ce->trg()->start();
698                 Function * ct = _parse_data->findFunc(frame.codereg,target);
699                 bool is_plt = false;
700                 bool is_nonret = false;
701
702                 is_plt = HASHDEF(plt_entries,target);
703    
704                 // CodeSource-defined tests 
705                 is_nonret = obj().cs()->nonReturning(target);
706                 if(!is_nonret && is_plt) {
707                     is_nonret |= obj().cs()->nonReturning(plt_entries[target]);
708                 }
709                 // Parsed return status tests
710                 if(!is_nonret && !is_plt && ct) {
711                     is_nonret |= (ct->retstatus() == NORETURN);
712                 }
713
714                 if(is_nonret) {
715                     parsing_printf("[%s] no fallthrough for non-returning call "
716                                    "to %lx at %lx\n",FILE__,target,
717                                    work->edge()->src()->lastInsnAddr());
718
719                     // unlink tempsink fallthrough edge
720                     Edge * remove = work->edge();
721                     remove->src()->removeTarget(remove);
722                     factory().free_edge(remove);
723                     continue;
724                 }
725             }
726         } else if(work->order() == ParseWorkElem::seed_addr) {
727             cur = leadersToBlock[work->target()]; 
728         }
729                        
730         if(NULL == cur) { 
731             pair<Block*,Edge*> newedge =
732                 add_edge(frame,
733                          frame.func,
734                          work->edge()->src(),
735                          work->target(),
736                          work->edge()->type(),
737                          work->edge());
738             cur = newedge.first;
739         }
740
741         if(HASHDEF(visited,cur->start()))
742         {
743             parsing_printf("[%s] skipping locally parsed target at %lx\n",
744                 FILE__,work->target());
745             continue;
746         } 
747         visited[cur->start()] = true;
748         leadersToBlock[cur->start()] = cur;
749
750         if(!cur->_parsed)
751         {
752             parsing_printf("[%s] parsing block %lx\n",
753                 FILE__,cur->start());
754             cur->_parsed = true;
755             curAddr = cur->start();
756         } else {
757             parsing_printf("[%s] deferring parse of shared block %lx\n",
758                 FILE__,cur->start());
759             if (func->_rs < UNKNOWN) {
760                 func->_rs = UNKNOWN;
761             }
762             continue;
763         }
764
765         /*
766          * External consumers of parsing may have invoked finalizing
767          * methods from callback routines. Ensure that the caches
768          * are invalidated because a new block is being added to the
769          * function's view
770          */
771         func->_cache_valid = false;
772
773         // NB Using block's region() here because it may differ from the
774         //    function's if control flow has jumped to another region
775         unsigned size = 
776             cur->region()->offset() + cur->region()->length() - curAddr;
777 #if defined(cap_instruction_api)
778         const unsigned char* bufferBegin = 
779          (const unsigned char *)(func->isrc()->getPtrToInstruction(curAddr));
780         InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
781
782         InstructionAdapter_t ah(dec, curAddr, func->obj(), 
783                                 cur->region(), func->isrc());
784 #else        
785         InstrucIter iter(curAddr, size, func->isrc());
786         InstructionAdapter_t ah(iter, 
787             func->obj(), cur->region(), func->isrc());
788 #endif        
789
790         using boost::tuples::tie;
791         tie(nextBlockAddr,nextBlock) = get_next_block(frame,_parse_data);
792
793         bool isNopBlock = ah.isNop();
794
795         while(true) 
796         {
797             curAddr = ah.getAddr();
798
799             /** Check for straight-line fallthrough **/
800             if(curAddr == nextBlockAddr) {
801                 parsing_printf("[%s] straight-line parse into block at %lx\n",
802                     FILE__,curAddr);
803
804                 ah.retreat();
805                 curAddr = ah.getAddr();
806
807                 end_block(cur,ah);
808                 pair<Block*,Edge*> newedge =
809                     add_edge(frame,frame.func,cur,
810                              nextBlockAddr,FALLTHROUGH,NULL);
811         
812                 if(!HASHDEF(visited,nextBlockAddr) && 
813                    !HASHDEF(leadersToBlock,nextBlockAddr)) { 
814                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
815                         FILE__,__LINE__,nextBlockAddr);
816
817                     frame.pushWork(
818                         new ParseWorkElem(
819                             NULL,
820                             newedge.second,
821                             nextBlockAddr,
822                             true,
823                             false)
824                         );
825                     leadersToBlock[nextBlockAddr] = nextBlock;
826                 }
827                 break;
828             } else if(curAddr > nextBlockAddr) {
829                 parsing_printf("[%s:%d] inconsistent instruction stream: "
830                                "%lx is within [%lx,%lx)\n",
831                     FILE__,__LINE__,curAddr,
832                     nextBlock->start(),nextBlock->end());
833
834                 // NB "cur" hasn't ended, so its range may
835                 // not look like it overlaps with nextBlock
836                 _pcb.overlapping_blocks(cur,nextBlock);
837
838                 tie(nextBlockAddr,nextBlock) = 
839                     get_next_block(frame,_parse_data);
840             }
841
842             // per-instruction callback notification 
843             ParseCallback::insn_details insn_det;
844             insn_det.insn = &ah;
845             _pcb.instruction_cb(func,curAddr,&insn_det);
846
847             if(isNopBlock && !ah.isNop()) {
848                 ah.retreat();
849
850                 end_block(cur,ah);
851                 pair<Block*,Edge*> newedge =
852                     add_edge(frame,frame.func,cur,curAddr,FALLTHROUGH,NULL);
853                 Block * targ = newedge.first;
854   
855                 parsing_printf("[%s:%d] nop-block ended at %lx\n",
856                     FILE__,__LINE__,curAddr); 
857                 if(targ && !HASHDEF(visited,targ->start())) {
858                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
859                         FILE__,__LINE__,targ->start());
860
861                     frame.pushWork(
862                         new ParseWorkElem(
863                             NULL,
864                             newedge.second,
865                             targ->start(),
866                             true,
867                             false)
868                         );
869                     leadersToBlock[targ->start()] = targ; 
870                 }
871                 break;
872             }
873             
874             /** Particular instruction handling (calls, branches, etc) **/
875             ++num_insns; 
876             if(ah.hasCFT()) {
877                 ProcessCFInsn(frame,cur,ah);
878                 break;
879             }
880             else if(func->_saves_fp && 
881                     func->_no_stack_frame &&  
882                     ah.isFrameSetupInsn()) // isframeSetup is expensive
883             {
884                 func->_no_stack_frame = false;
885             }
886             else if(ah.isLeave())
887             {
888                 func->_no_stack_frame = false;
889             }
890             else if( ah.isAbortOrInvalidInsn() )
891             {
892                 // 4. Invalid or `abort-causing' instructions
893                 end_block(cur,ah);
894                 break; 
895             }
896             else if( ah.isInterruptOrSyscall() )
897             {
898                 // 5. Raising instructions
899                 end_block(cur,ah);
900     
901                 pair<Block*,Edge*> newedge =
902                     add_edge(frame,frame.func,cur,ah.getNextAddr(),FALLTHROUGH,NULL);
903                 Block * targ = newedge.first;
904    
905                 if(targ && !HASHDEF(visited,targ->start()) &&
906                            !HASHDEF(leadersToBlock,targ->start())) {
907                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
908                         FILE__,__LINE__,targ->start());
909
910                     frame.pushWork(
911                         new ParseWorkElem(
912                             NULL,
913                             newedge.second,
914                             targ->start(),
915                             true,
916                             false)
917                         );
918                     leadersToBlock[targ->start()] = targ; 
919                 }
920                 break;
921             }
922             else {
923                 // default
924             }
925
926             /** Check for overruns of valid address space **/
927             if(!is_code(func,ah.getNextAddr()))
928             {
929                 parsing_printf("[%s] next insn %lx is invalid\n",
930                     FILE__,ah.getNextAddr());
931
932                 end_block(cur,ah);
933                 break;
934             }
935             else if(!cur->region()->contains(ah.getNextAddr()))
936             {
937                 parsing_printf("[%s] next address %lx is outside [%lx,%lx)\n",
938                     FILE__,ah.getNextAddr(),
939                     cur->region()->offset(),
940                     cur->region()->offset()+cur->region()->length());
941                 end_block(cur,ah);
942                 break;
943             }
944             ah.advance();
945         }
946     }
947
948     /** parsing complete **/
949     if(HASHDEF(plt_entries,frame.func->addr())) {
950        if(obj().cs()->nonReturning(frame.func->addr())) {
951           frame.func->_rs = NORETURN;
952        }
953        else
954           frame.func->_rs = UNKNOWN; 
955     }
956     else if(frame.func->_rs == UNSET) {
957         frame.func->_rs = NORETURN;
958     }
959     frame.set_status(ParseFrame::PARSED);
960 }
961
962 void
963 Parser::end_block(Block * b, InstructionAdapter_t & ah)
964 {
965     b->_lastInsn = ah.getAddr();
966     b->_end = ah.getNextAddr();
967
968     record_block(b);
969 }
970
971 void
972 Parser::record_block(Block *b)
973 {
974     parsing_printf("[%s:%d] recording block [%lx,%lx)\n",
975         FILE__,__LINE__,b->start(),b->end());
976     _parse_data->record_block(b->region(),b);
977 }
978
979
980 Block *
981 Parser::block_at(
982     Function * owner,
983     Address addr,
984     Block * & split)
985 {
986     set<Block *> overlap;
987     Block * exist = NULL;
988     Block * ret = NULL;
989     Block * inconsistent = NULL;
990     Address prev_insn = 0;
991
992     split = NULL;
993
994     CodeRegion *cr;
995     if(owner->region()->contains(addr))
996         cr = owner->region();
997     else
998         cr = _parse_data->reglookup(owner->region(),addr);
999
1000     if(!is_code(owner,addr)) {
1001         parsing_printf("[%s] block address %lx rejected by isCode()\n",
1002             FILE__,addr);
1003         return NULL;
1004     }
1005
1006     if(NULL == (exist = _parse_data->findBlock(cr,addr))) {
1007         _parse_data->findBlocks(cr,addr,overlap);
1008         if(overlap.size() > 1)
1009             parsing_printf("[%s] address %lx overlapped by %d blocks\n",
1010                 FILE__,addr,overlap.size());
1011
1012         /* Platform specific consistency test: 
1013            generally checking for whether the address is an
1014            instruction boundary in a block */
1015         for(set<Block *>::iterator sit=overlap.begin();sit!=overlap.end();++sit)
1016         {
1017             Block * b = *sit;
1018
1019             // consistent will fill in prev_insn with the address of the
1020             // instruction preceeding addr if addr is consistent
1021             if(b->consistent(addr,prev_insn)) {
1022                 exist = b;
1023                 break;   
1024             } else {
1025                 parsing_printf("[%s] %lx is inconsistent with [%lx,%lx)\n", 
1026                     FILE__,addr,b->start(),b->end());
1027                 if(inconsistent) {
1028                     parsing_printf("   multiple inconsistent blocks!\n");
1029                 }
1030                 inconsistent = b;
1031             }
1032         }
1033     }
1034
1035     if(exist) {
1036         if(exist->start() == addr) {
1037             parsing_printf("[%s] block %lx exists\n",
1038                 FILE__,addr);
1039             ret = exist;
1040         }
1041         else {
1042             parsing_printf("[%s] address %lx splits [%lx,%lx) (%p)\n",
1043                FILE__,addr,exist->start(),exist->end(),exist);
1044             split = exist;
1045             ret = split_block(owner,exist,addr,prev_insn);
1046         }
1047     } else {
1048         ret = factory().mkblock(owner,cr,addr);
1049         record_block(ret);
1050     }
1051
1052     if(unlikely(inconsistent)) {
1053        _pcb.overlapping_blocks(ret,inconsistent); 
1054     }
1055
1056     return ret;
1057 }
1058
1059 pair<Block *,Edge *>
1060 Parser::add_edge(
1061     ParseFrame & frame,
1062     Function * owner,
1063     Block * src,
1064     Address dst,
1065     EdgeTypeEnum et,
1066     Edge * exist)
1067 {
1068     Block * split = NULL;
1069     Block * ret = NULL;
1070     Edge * newedge = NULL;
1071     pair<Block *, Edge *> retpair((Block *) NULL, (Edge *) NULL);
1072
1073     if(!is_code(owner,dst)) {
1074         parsing_printf("[%s] target address %lx rejected by isCode()\n",dst);
1075         return retpair;
1076     }
1077
1078     ret = block_at(owner,dst,split);
1079     retpair.first = ret;
1080
1081     if(split == src) {
1082         // special case -- same block
1083         src = ret;
1084     }
1085
1086     if(split && HASHDEF(frame.visited,split->start())) {
1087         // prevent "delayed parsing" of extant block if 
1088         // this frame has already visited it
1089         frame.visited[ret->start()] = true;
1090         frame.leadersToBlock[ret->start()] = ret;
1091     }
1092
1093     if(NULL == exist) {
1094         newedge = link(src,ret,et,false);
1095         retpair.second = newedge;
1096     } else {
1097         relink(exist,src,ret);
1098         retpair.second = exist;
1099     }
1100
1101     return retpair;
1102 }
1103
1104 Block *
1105 Parser::split_block(
1106     Function * owner, 
1107     Block *b, 
1108     Address addr, 
1109     Address previnsn)
1110 {
1111     Block * ret;
1112     CodeRegion * cr;
1113     if(owner->region()->contains(b->start()))
1114         cr = owner->region();
1115     else
1116         cr = _parse_data->reglookup(owner->region(),b->start());
1117     region_data * rd = _parse_data->findRegion(cr);
1118
1119     // enable for extra-safe testing, but callers are responsbible
1120     // assert(b->consistent(addr);
1121
1122     ret = factory().mkblock(owner,cr,addr);
1123
1124     // move out edges
1125     vector<Edge *> & trgs = b->_targets;
1126     vector<Edge *>::iterator tit = trgs.begin(); 
1127     for(;tit!=trgs.end();++tit) {
1128         (*tit)->_source = ret;
1129         ret->_targets.push_back(*tit);
1130     }
1131     trgs.clear();
1132
1133     ret->_end = b->_end;
1134     ret->_lastInsn = b->_lastInsn;
1135     ret->_parsed = true;
1136     link(b,ret,FALLTHROUGH,false); 
1137
1138     record_block(ret);
1139
1140     // b's range has changed
1141     rd->blocksByRange.remove(b);
1142     b->_end = addr;
1143     b->_lastInsn = previnsn;
1144     rd->blocksByRange.insert(b); 
1145     // Any functions holding b that have already been finalized
1146     // need to have their caches invalidated so that they will
1147     // find out that they have this new 'ret' block
1148     std::set<Function*> prev_owners;
1149     rd->findFuncs(b->start(),prev_owners);
1150     for(std::set<Function*>::iterator oit = prev_owners.begin();
1151         oit != prev_owners.end(); ++oit)
1152     {
1153         Function * po = *oit;
1154         po->_cache_valid = false;
1155         parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
1156                 "func at %lx\n",
1157         FILE__,__LINE__,b->start(),b->end(),po->addr());
1158     }
1159
1160     // if we're re-parsing in this function, inform user program of the split
1161     if (owner->_extents.size()) {
1162         _pcb.block_split(b,ret);
1163     }
1164
1165     return ret;
1166  }
1167
1168 pair<Function *,Edge*>
1169 Parser::bind_call(ParseFrame & frame, Address target, Block * cur, Edge * exist)
1170 {
1171     Function * tfunc = NULL;
1172     Block * tblock = NULL;
1173     FuncSource how = RT;
1174     if(frame.func->_src == GAP || frame.func->_src == GAPRT)
1175         how = GAPRT;
1176
1177     // look it up
1178     tfunc = _parse_data->get_func(frame.codereg,target,how);
1179     if(!tfunc) {
1180         parsing_printf("[%s:%d] can't bind call to %lx\n",
1181             FILE__,__LINE__,target);
1182         return pair<Function*,Edge*>((Function *) NULL,exist);
1183     }
1184
1185     // add an edge
1186     pair<Block*,Edge*> ret = add_edge(frame,tfunc,cur,target,CALL,exist);
1187     tblock = ret.first;
1188     if(!tblock) {
1189         parsing_printf("[%s:%d] can't bind call to %lx\n",
1190             FILE__,__LINE__,target);
1191         return pair<Function*,Edge*>((Function *) NULL,exist);
1192     }
1193
1194     return pair<Function*,Edge*>(tfunc,ret.second);
1195 }
1196
1197 Function *
1198 Parser::findFuncByEntry(CodeRegion *r, Address entry)
1199 {
1200     if(_parse_state < PARTIAL) {
1201         parsing_printf("[%s:%d] Parser::findFuncByEntry([%lx,%lx),%lx) "
1202                        "forced parsing\n",
1203             FILE__,__LINE__,r->low(),r->high(),entry);
1204         parse();
1205     }
1206     return _parse_data->findFunc(r,entry);
1207 }
1208
1209 int 
1210 Parser::findFuncs(CodeRegion *r, Address addr, set<Function *> & funcs)
1211 {
1212     if(_parse_state < COMPLETE) {
1213         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1214                        "forced parsing\n",
1215             FILE__,__LINE__,r->low(),r->high(),addr);
1216         parse();
1217     }
1218     if(_parse_state < FINALIZED) {
1219         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1220                        "forced finalization\n",
1221             FILE__,__LINE__,r->low(),r->high(),addr);
1222         finalize();
1223     }
1224     return _parse_data->findFuncs(r,addr,funcs);
1225 }
1226
1227 Block *
1228 Parser::findBlockByEntry(CodeRegion *r, Address entry)
1229 {
1230     if(_parse_state < PARTIAL) {
1231         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
1232                        "forced parsing\n",
1233             FILE__,__LINE__,r->low(),r->high(),entry);
1234         parse();
1235     }
1236     return _parse_data->findBlock(r,entry);
1237 }
1238
1239 int
1240 Parser::findBlocks(CodeRegion *r, Address addr, set<Block *> & blocks)
1241 {
1242     if(_parse_state < COMPLETE) {
1243         parsing_printf("[%s:%d] Parser::findBlocks([%lx,%lx),%lx,...) "
1244                        "forced parsing\n",
1245             FILE__,__LINE__,r->low(),r->high(),addr);
1246         parse();
1247     }
1248     return _parse_data->findBlocks(r,addr,blocks); 
1249 }
1250
1251 Edge*
1252 Parser::link(Block *src, Block *dst, EdgeTypeEnum et, bool sink)
1253 {
1254     Edge * e = factory().mkedge(src,dst,et);
1255     e->_type._sink = sink;
1256     src->_targets.push_back(e);
1257     dst->_sources.push_back(e);
1258     return e;
1259 }
1260
1261 /* 
1262  * During parsing, all edges are temporarily linked to the _tempsink
1263  * block. Adding and then removing edges to and from this block is
1264  * wasteful, especially given that removal is an O(N) operation with
1265  * vector storage. This call thus does not record edges into the sink.
1266  * These edges are guaranteed to have been relinked when parsing is
1267  * in state COMPLETE.
1268  *
1269  * NB This introduces an inconsistency into the CFG invariant that all
1270  *    targets of an edge have that edge in their source list if the
1271  *    data structures are queried during parsing. Extenders of 
1272  *    parsing callbacks are the only ones who can see this state;
1273  *    they have been warned and should know better.
1274  */
1275 Edge*
1276 Parser::link_tempsink(Block *src, EdgeTypeEnum et)
1277 {
1278     Edge * e = factory().mkedge(src,_sink,et);
1279     e->_type._sink = true;
1280     src->_targets.push_back(e);
1281     return e;
1282 }
1283
1284 void
1285 Parser::relink(Edge * e, Block *src, Block *dst)
1286 {
1287     if(src != e->src()) {
1288         e->src()->removeTarget(e);
1289         e->_source = src;
1290         src->addTarget(e);
1291     }
1292     if(dst != e->trg()) { 
1293         if(e->trg() != _sink)
1294             e->trg()->removeSource(e);
1295         e->_target = dst;
1296         dst->addSource(e);
1297     }
1298
1299     e->_type._sink = (dst == _sink);
1300 }
1301
1302 ParseFrame::Status
1303 Parser::frame_status(CodeRegion * cr, Address addr)
1304 {
1305     // XXX parsing frames may have been cleaned up, but status
1306     //     is always cached
1307     return _parse_data->frameStatus(cr,addr);
1308 }
1309
1310 void
1311 Parser::remove_func(Function *func)
1312 {
1313     if (sorted_funcs.end() != sorted_funcs.find(func)) {
1314         sorted_funcs.erase(func);
1315     }
1316     if (HINT == func->src()) {
1317         for (unsigned fidx=0; fidx < hint_funcs.size(); fidx++) {
1318             if (hint_funcs[fidx] == func) {
1319                 hint_funcs[fidx] = hint_funcs[hint_funcs.size()-1];
1320                 hint_funcs.pop_back();
1321                 break;
1322             }
1323         }
1324     }
1325     else {
1326         for (unsigned fidx=0; fidx < discover_funcs.size(); fidx++) {
1327             if (discover_funcs[fidx] == func) {
1328                 discover_funcs[fidx] = discover_funcs[discover_funcs.size()-1];
1329                 discover_funcs.pop_back();
1330                 break;
1331             }
1332         }
1333     }
1334     
1335     _parse_data->remove_func(func);
1336 }
1337
1338 void
1339 Parser::remove_block(Dyninst::ParseAPI::Block *block)
1340 {
1341     _parse_data->remove_block(block);
1342 }