Merge branch 'master' into NewInstpoint
[dyninst.git] / parseAPI / src / Parser.C
1 /*
2  * Copyright (c) 1996-2009 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as "Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * By your use of Paradyn, you understand and agree that we (or any
12  * other person or entity with proprietary rights in Paradyn) are
13  * under no obligation to provide either maintenance services,
14  * update services, notices of latent defects, or correction of
15  * defects for Paradyn.
16  * 
17  * This library is free software; you can redistribute it and/or
18  * modify it under the terms of the GNU Lesser General Public
19  * License as published by the Free Software Foundation; either
20  * version 2.1 of the License, or (at your option) any later version.
21  * 
22  * This library is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * Lesser General Public License for more details.
26  * 
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30  */
31
32 #include <vector>
33 #include <limits>
34
35 #include "dyntypes.h"
36
37 #include "CodeObject.h"
38 #include "CFGFactory.h"
39 #include "ParseCallback.h"
40 #include "Parser.h"
41 #include "CFG.h"
42 #include "util.h"
43 #include "debug_parse.h"
44
45 #if defined(os_solaris)
46 // XXX old GCC versions on solaris cause problems with
47 //     boost at Wisconsin.
48 #define BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
49 #include <boost/tuple/tuple.hpp>
50 #undef BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
51 #else
52 #include <boost/tuple/tuple.hpp>
53 #endif
54
55 using namespace std;
56 using namespace Dyninst;
57 using namespace Dyninst::ParseAPI;
58 #if defined(cap_instruction_api)
59 using namespace Dyninst::InstructionAPI;
60 #endif
61
62 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
63 typedef vector< edge_pair_t > Edges_t;
64
65
66 namespace {
67     struct less_cr {
68      bool operator()(CodeRegion * x, CodeRegion * y) 
69      { 
70          return x->offset() < y->offset();
71      }
72     };
73 }
74
75 Parser::Parser(CodeObject & obj, CFGFactory & fact, ParseCallback & pcb) :
76     _obj(obj),
77     _cfgfact(fact),
78     _pcb(pcb),
79     _parse_data(NULL),
80     _sink(NULL),
81     _parse_state(UNPARSED),
82     _in_parse(false),
83     _in_finalize(false)
84 {
85     // cache plt entries for fast lookup
86     const map<Address, string> & lm = obj.cs()->linkage();
87     map<Address, string>::const_iterator lit = lm.begin();
88     for( ; lit != lm.end(); ++lit) {
89         parsing_printf("Cached PLT entry %s (%lx)\n",lit->second.c_str(),lit->first);
90         plt_entries[lit->first] = lit->second;
91     }
92
93     if(obj.cs()->regions().empty()) {
94         parsing_printf("[%s:%d] CodeSource provides no CodeRegions"
95                        " -- unparesable\n",
96             FILE__,__LINE__);
97         _parse_state = UNPARSEABLE;
98         return;
99     }
100
101     // check whether regions overlap
102     vector<CodeRegion *> const& regs = obj.cs()->regions();
103     vector<CodeRegion *> copy(regs.begin(),regs.end());
104     sort(copy.begin(),copy.end(),less_cr());
105
106     // allocate a sink block -- region is arbitrary
107     _sink = _cfgfact.mksink(&_obj,copy[0]);
108
109     bool overlap = false;
110     CodeRegion * prev = copy[0], *cur = NULL;
111     for(unsigned i=1;i<copy.size();++i) {
112         cur = copy[i];
113         if(cur->offset() < prev->offset() + prev->length()) {
114             parsing_printf("Overlapping code regions [%lx,%lx) and [%lx,%lx)\n",
115                 prev->offset(),prev->offset()+prev->length(),
116                 cur->offset(),cur->offset()+cur->length());
117             overlap = true;
118             break;
119         }
120     }
121
122     if(overlap)
123         _parse_data = new OverlappingParseData(this,copy);
124     else
125         _parse_data = new StandardParseData(this);
126 }
127
128 ParseFrame::~ParseFrame()
129 {
130     // no locally allocated storage
131 }
132
133 Parser::~Parser()
134 {
135     if(_parse_data)
136         delete _parse_data;
137
138     vector<ParseFrame *>::iterator fit = frames.begin();
139     for( ; fit != frames.end(); ++fit) 
140         delete *fit;
141     frames.clear();
142 }
143
144 void
145 Parser::add_hint(Function * f)
146 {
147     if(!_parse_data->findFunc(f->region(),f->addr()))
148         record_func(f);
149 }
150
151 void
152 Parser::parse()
153 {
154     parsing_printf("[%s:%d] parse() called on Parser with state %d\n",
155         FILE__,__LINE__,_parse_state);
156
157     if(_parse_state == UNPARSEABLE)
158         return;
159
160     assert(!_in_parse);
161     _in_parse = true;
162
163     parse_vanilla();
164
165     // anything else by default...?
166
167     if(_parse_state < COMPLETE)
168         _parse_state = COMPLETE;
169     
170     _in_parse = false;
171 }
172
173 void
174 Parser::parse_at(
175     CodeRegion * region,
176     Address target,
177     bool recursive,
178     FuncSource src)
179 {
180     Function *f;
181     ParseFrame *pf;
182     vector<ParseFrame *> work;
183
184     parsing_printf("[%s:%d] entered parse_at([%lx,%lx),%lx)\n",
185         FILE__,__LINE__,region->low(),region->high(),target);
186
187     if(!region->contains(target)) {
188         parsing_printf("\tbad address, bailing\n");
189         return;
190     }
191
192     if(_parse_state < PARTIAL)
193         _parse_state = PARTIAL;
194
195     f = _parse_data->get_func(region,target,src);
196     if(!f) {
197         parsing_printf("   could not create function at %lx\n",target);
198         return;
199     }
200
201     ParseFrame::Status exist = _parse_data->frameStatus(region,target);
202     if(exist != ParseFrame::BAD_LOOKUP && exist != ParseFrame::UNPARSED) {
203         parsing_printf("   function at %lx already parsed, status %d\n",
204             target, exist);
205         return;
206     }
207
208     if(!(pf = _parse_data->findFrame(region,target))) {
209         pf = new ParseFrame(f,_parse_data);
210         init_frame(*pf);
211         frames.push_back(pf);
212         _parse_data->record_frame(pf);
213     } 
214
215     work.push_back(pf);
216     parse_frames(work,recursive);
217
218     // downgrade state if necessary
219     if(_parse_state > COMPLETE)
220         _parse_state = COMPLETE;
221
222 }
223
224 void
225 Parser::parse_at(Address target, bool recursive, FuncSource src)
226 {
227     CodeRegion * region = NULL;
228
229     parsing_printf("[%s:%d] entered parse_at(%lx)\n",FILE__,__LINE__,target);
230
231     if(_parse_state == UNPARSEABLE)
232         return;
233
234     StandardParseData * spd = dynamic_cast<StandardParseData *>(_parse_data);
235     if(!spd) {
236         parsing_printf("   parse_at is invalid on overlapping regions\n");
237         return;
238     }
239
240     region = spd->reglookup(region,target); // input region ignored for SPD
241     if(!region) {
242         parsing_printf("   failed region lookup at %lx\n",target);
243         return;
244     }
245
246     parse_at(region,target,recursive,src);
247 }
248
249 void
250 Parser::parse_vanilla()
251 {
252     ParseFrame *pf;
253     vector<ParseFrame *> work;
254     vector<Function *>::iterator fit;
255
256     parsing_printf("[%s:%d] entered parse_vanilla()\n",FILE__,__LINE__);
257     parsing_printf("\t%d function hints\n",hint_funcs.size());
258
259     if(_parse_state < PARTIAL)
260         _parse_state = PARTIAL;
261     else
262         parsing_printf("\tparse state is %d, some parsing already done\n",
263             _parse_state);
264
265     /* Initialize parse frames from hints */
266     for(fit=hint_funcs.begin();fit!=hint_funcs.end();++fit) {
267         Function * hf = *fit;
268         ParseFrame::Status test = frame_status(hf->region(),hf->addr());
269         if(test != ParseFrame::BAD_LOOKUP &&
270            test != ParseFrame::UNPARSED)
271         {
272             parsing_printf("\tskipping repeat parse of %lx [%s]\n",
273                 hf->addr(),hf->name().c_str());
274             continue;
275         }
276
277         pf = new ParseFrame(hf,_parse_data);
278         init_frame(*pf);
279         frames.push_back(pf);
280         work.push_back(pf);
281         _parse_data->record_frame(pf);
282     }
283
284     parse_frames(work,true);
285 }
286
287 void
288 Parser::parse_edges( vector< ParseWorkElem * > & work_elems )
289 {
290     if(_parse_state == UNPARSEABLE)
291         return;
292
293     // build up set of needed parse frames and load them with work elements
294     set<ParseFrame*> frameset; // for dup checking
295     vector<ParseFrame*> frames;
296
297     for (unsigned idx=0; idx < work_elems.size(); idx++) {
298
299         ParseWorkElem *elem = work_elems[idx];
300         Block *src = elem->edge()->src();
301
302         if (elem->order() == ParseWorkElem::call_fallthrough)
303         {
304             Edge *callEdge = NULL;
305             Block::edgelist trgs = src->targets();
306             for (Block::edgelist::iterator eit = trgs.begin(); 
307                  eit != trgs.end(); 
308                  eit++) 
309             {
310                 if ((*eit)->type() == CALL) {
311                     callEdge = *eit;
312                     break;
313                 }
314             }
315             // create a call work elem so that the bundle is complete
316             // and set the target function's return status and 
317             // tamper to RETURN and TAMPER_NONE, respectively
318             //assert(callEdge);
319             // KEVIN TODO
320             // In cases where we had a direct call to garbage that was later unpacked,
321             // we aren't putting in the call edge when the call is first executed. I'm
322             // not sure why not, so I'm attempting to continue past here without
323             // fixing up the called function...
324             // Also, in the case I saw, the callee was _not_ returning. Not directly. It 
325             // led into a big case with a longjmp() equivalent. 
326
327             if (callEdge) 
328             {
329                 bool isResolvable = false;
330                 Address callTarget = 0;
331                 if ( ! callEdge->sinkEdge() ) 
332                 {
333                     isResolvable = true;
334                     callTarget = callEdge->trg()->start();
335                     Function *callee = findFuncByEntry(
336                         callEdge->trg()->region(), callTarget);
337                     assert(callee);
338                     callee->set_retstatus(RETURN);
339                     callee->_tamper = TAMPER_NONE;
340                 }
341                 elem->bundle()->add(new ParseWorkElem
342                     ( elem->bundle(), 
343                     callEdge,
344                     callTarget,
345                     isResolvable,
346                     false ));
347             }
348         }
349         ParseFrame *frame = _parse_data->findFrame
350             ( src->region(), 
351               src->lastInsnAddr() );
352         bool isNewFrame = false;
353         if (!frame) {
354             vector<Function*> funcs;
355             src->getFuncs(funcs);
356             frame = new ParseFrame(*funcs.begin(),_parse_data);
357             for (unsigned fix=1; fix < funcs.size(); fix++) {
358                 // if the block is shared, all of its funcs need
359                 // to add the new edge
360                 funcs[fix]->_cache_valid = false;
361             }
362             isNewFrame = true;
363         }
364
365         // push before frame init so no seed is added
366         if (elem->bundle()) {
367             frame->work_bundles.push_back(elem->bundle());
368         }
369         frame->pushWork(elem);
370         if (isNewFrame) {
371             init_frame(*frame);
372         }
373
374         if (frameset.end() == frameset.find(frame)) {
375             frameset.insert(frame);
376             frames.push_back(frame);
377         }
378     }
379
380     // now parse
381     if(_parse_state < PARTIAL)
382         _parse_state = PARTIAL;
383     _in_parse = true;
384
385     parse_frames( frames, true );
386
387     if(_parse_state > COMPLETE)
388         _parse_state = COMPLETE;
389     _in_parse = false;
390
391     finalize();
392
393 }
394
395 void
396 Parser::parse_frames(vector<ParseFrame *> & work, bool recursive)
397 {
398     ParseFrame * pf;
399
400     /* Recursive traversal parsing */ 
401     while(!work.empty()) {
402         pf = work.back();
403         work.pop_back();
404
405         if(pf->status() == ParseFrame::PARSED)
406             continue;
407
408         parse_frame(*pf,recursive);
409         switch(pf->status()) {
410             case ParseFrame::CALL_BLOCKED: {
411                 parsing_printf("[%s] frame %lx blocked at %lx\n",
412                     FILE__,pf->func->addr(),pf->curAddr);
413
414                 assert(pf->call_target);        
415
416                 parsing_printf("    call target %lx\n",pf->call_target->addr());
417                 work.push_back(pf);
418     
419                 CodeRegion * cr = pf->call_target->region();
420                 Address targ = pf->call_target->addr();
421
422                 ParseFrame * tf = _parse_data->findFrame(cr,targ);
423                 if(!tf)
424                 {
425                     // sanity
426                     if(_parse_data->frameStatus(cr,targ) == ParseFrame::PARSED)
427                         assert(0);
428
429                     tf = new ParseFrame(pf->call_target,_parse_data);
430                     init_frame(*tf);
431                     frames.push_back(tf);
432                     _parse_data->record_frame(tf);
433                 }
434                 if(likely(recursive))
435                     work.push_back(tf);
436                 else {
437                     assert(0);
438                     // XXX should never get here
439                     //parsing_printf("    recursive parsing disabled\n");
440                 }
441                 break;
442             }
443             case ParseFrame::PARSED:
444                 parsing_printf("[%s] frame %lx complete, return status: %d\n",
445                     FILE__,pf->func->addr(),pf->func->_rs);
446                 if (unlikely( _obj.defensiveMode() && 
447                               TAMPER_NONE != pf->func->tampersStack() &&
448                               TAMPER_NONZERO != pf->func->tampersStack() ))
449                 {   // add a fallthrough / tamper-target edge if there is one
450                     for (unsigned widx = 0; 
451                          pf->func->tampersStack() == TAMPER_REL && 
452                          widx < work.size(); 
453                          widx++) 
454                     {
455                         if (work[widx]->status() == ParseFrame::CALL_BLOCKED &&
456                             pf->func == work[widx]->call_target) 
457                         {
458                             for (unsigned bidx=0 ; 
459                                  bidx < work[widx]->work_bundles.size(); 
460                                  bidx++) 
461                             {
462                                 const vector<ParseWorkElem*> &elems = 
463                                     work[widx]->work_bundles[bidx]->elems();
464                                 bool rightBundle = false;
465                                 ParseWorkElem * ftEdge = NULL;
466                                 for (unsigned eix=0; eix < elems.size(); eix++)
467                                 {
468                                     if (NULL == elems[eix]->edge()) 
469                                     {
470                                         continue;
471                                     }
472                                     if (elems[eix]->edge()->type() == CALL &&
473                                         elems[eix]->target()==pf->func->addr())
474                                     {
475                                         rightBundle = true;
476                                     }
477                                     else if (elems[eix]->edge()->type() == 
478                                              CALL_FT)
479                                     {
480                                         ftEdge = elems[eix];
481                                     }
482                                 }
483                                 if (rightBundle && ftEdge) 
484                                 {
485                                     ftEdge->setTarget(ftEdge->target() + 
486                                                       pf->func->_tamper_addr);
487                                 }
488                             }
489                         }
490                     }
491                     if (pf->func->tampersStack() == TAMPER_ABS) 
492                     {
493                         ParseFrame * tf = getTamperAbsFrame(pf->func);
494                         if (tf) 
495                         {
496                             if( ! _parse_data->findFrame(tf->func->region(),
497                                                          tf->func->addr()) ) 
498                             {
499                                 init_frame(*tf);
500                                 frames.push_back(tf);
501                                 _parse_data->record_frame(tf);
502                                 Address loadBase =0;
503                                 _pcb.loadAddr(pf->func->_tamper_addr, loadBase);
504                                 _pcb.updateCodeBytes(pf->func->_tamper_addr - loadBase);
505                             }
506                             work.push_back(tf);
507                         }
508                     }
509                 }
510                 pf->cleanup();
511                 break;
512             case ParseFrame::FRAME_ERROR:
513                 parsing_printf("[%s] frame %lx error at %lx\n",
514                     FILE__,pf->func->addr(),pf->curAddr);
515                 break;
516             default:
517                 assert(0 && "invalid parse frame status");
518         }
519     }
520
521     for(unsigned i=0;i<frames.size();++i) {
522         _parse_data->remove_frame(frames[i]);
523         delete frames[i];
524     }
525     frames.clear();
526 }
527
528 /* Finalizing all functions for consumption:
529   
530    - Finish delayed parsing
531    - Prepare and record FuncExtents for range-based lookup
532 */
533
534 // Could set cache_valid depending on whether the function is currently
535 // being parsed somewhere. 
536
537 void
538 Parser::finalize(Function *f)
539 {
540     if(f->_cache_valid) {
541         return;
542     }
543
544     if(!f->_parsed) {
545         parsing_printf("[%s:%d] Parser::finalize(f[%lx]) "
546                        "forced parsing\n",
547             FILE__,__LINE__,f->addr());
548         parse();
549     }
550
551         bool cache_value = true;
552         if(frame_status(f->region(), f->addr()) < ParseFrame::PARSED) {
553                 // XXX prevent caching of blocks, extents for functions that
554                 // are actively being parsed. This prevents callbacks and other
555                 // functions called from within, e.g. parse_frame from setting
556                 // the caching flag and preventing later updates to the blocks()
557                 // vector during finalization.
558                 cache_value = false;
559         }
560
561     parsing_printf("[%s] finalizing %s (%lx)\n",
562         FILE__,f->name().c_str(),f->addr());
563
564     region_data * rd = _parse_data->findRegion(f->region());
565     assert(rd);
566
567     // finish delayed parsing and sorting
568     vector<Block*> const& blocks = f->blocks_int();
569
570     // is this the first time we've parsed this function?
571     bool firstParse = true;
572     if (unlikely( f->_extents.size() )) {
573         firstParse = false;
574     }
575     
576     // extents
577     _parse_data->remove_extents(f->_extents);
578     f->_extents.clear();
579
580     if(blocks.empty()) {
581         f->_cache_valid = cache_value; // see above
582         return;
583     }
584     
585     vector<Block*>::const_iterator bit = blocks.begin();
586     FuncExtent * ext = NULL;
587     Address ext_s = (*bit)->start();
588     Address ext_e = ext_s;
589
590     for( ; bit != blocks.end(); ++bit) {
591         Block * b = *bit;
592         if(b->start() > ext_e) {
593             ext = new FuncExtent(f,ext_s,ext_e);
594             parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
595             f->_extents.push_back(ext);
596             rd->funcsByRange.insert(ext);
597             ext_s = b->start();
598         }
599         ext_e = b->end();
600     }
601     ext = new FuncExtent(f,ext_s,ext_e);
602     parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
603     rd->funcsByRange.insert(ext);
604     f->_extents.push_back(ext);
605
606     f->_cache_valid = cache_value; // see comment at function entry
607
608     if (unlikely( f->obj()->defensiveMode())) {
609         // add fallthrough edges for calls assumed not to be returning
610         // whose fallthrough blocks we parsed anyway (this happens if 
611         // the FT block is also a branch target)
612         Function::edgelist & edges = f->_call_edge_list;
613         for (Function::edgelist::iterator eit = edges.begin();
614              eit != edges.end(); 
615              eit++)
616         {
617             if (2 > (*eit)->src()->targets().size()) {
618                 Block *ft = _parse_data->findBlock((*eit)->src()->region(),
619                                                    (*eit)->src()->end());
620                 if (ft) {
621                     link((*eit)->src(),ft,CALL_FT,false);
622                 }
623             }
624         }
625         // check for stack tampering
626         if ( firstParse ) {
627             f->tampersStack();
628             _pcb.newfunction_retstatus( f );
629         }
630     }
631 }
632
633 void
634 Parser::finalize()
635 {
636     assert(!_in_finalize);
637     _in_finalize = true;
638
639     if(_parse_state < FINALIZED) {
640         finalize_funcs(hint_funcs);
641         finalize_funcs(discover_funcs);
642         _parse_state = FINALIZED;
643     }
644
645     _in_finalize = false;
646 }
647
648 void
649 Parser::finalize_funcs(vector<Function *> &funcs)
650 {
651     vector<Function *>::iterator fit = funcs.begin();
652     for( ; fit != funcs.end(); ++fit) {
653         finalize(*fit);
654     } 
655 }
656
657 void
658 Parser::record_func(Function *f) {
659     if(!f) return;
660
661     if(f->src() == HINT)
662         hint_funcs.push_back(f);
663     else
664         discover_funcs.push_back(f);
665
666     sorted_funcs.insert(f);
667
668     _parse_data->record_func(f);
669 }
670
671
672 void
673 Parser::init_frame(ParseFrame & frame)
674 {
675    Block * b = NULL;
676     Block * split = NULL;
677
678     if ( ! frame.func->_entry ) 
679     {
680         // Find or create a block
681         b = block_at(frame.func, frame.func->addr(),split);
682         if(b) {
683             frame.leadersToBlock[frame.func->addr()] = b;
684             frame.func->_entry = b;
685             frame.seed = new ParseWorkElem(NULL,NULL,frame.func->addr(),true,false);
686             frame.pushWork(frame.seed);
687         } else {
688             parsing_printf("[%s] failed to initialize parsing frame\n",
689                 FILE__);
690             return;
691         }
692         if (split) {
693             _pcb.block_split(split,b);
694         }
695     }
696
697     // FIXME these operations should move into the actual parsing
698     Address ia_start = frame.func->addr();
699     unsigned size = 
700      frame.codereg->offset() + frame.codereg->length() - ia_start;
701 #if defined(cap_instruction_api)
702     const unsigned char* bufferBegin =
703      (const unsigned char *)(frame.func->isrc()->getPtrToInstruction(ia_start));
704     InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
705     InstructionAdapter_t ah(dec, ia_start, frame.func->obj(),
706         frame.codereg, frame.func->isrc(), b);
707 #else
708     InstrucIter iter(ia_start, size, frame.func->isrc());
709     InstructionAdapter_t ah(iter, 
710         frame.func->obj(),
711         frame.codereg,
712         frame.func->isrc(), b);
713 #endif
714     if(ah.isStackFramePreamble())
715         frame.func->_no_stack_frame = false;
716     frame.func->_saves_fp = ah.savesFP();
717 }
718
719 void ParseFrame::cleanup()
720 {
721     for(unsigned i=0;i<work_bundles.size();++i)
722         delete work_bundles[i];
723 }
724
725 namespace {
726     inline Edge * bundle_call_edge(ParseWorkBundle * b)
727     {
728         if(!b) return NULL;
729
730         vector<ParseWorkElem*> const& elems = b->elems();
731         vector<ParseWorkElem*>::const_iterator it = elems.begin();
732         for( ; it != elems.end(); ++it) {
733             if((*it)->edge()->type() == CALL)
734                 return (*it)->edge();
735         }
736         return NULL;
737     }
738
739     /* 
740      * Look up the next block for detection of straight-line
741      * fallthrough edges into existing blocks.
742      */
743     inline std::pair<Address, Block*> get_next_block(
744         Address addr,
745         CodeRegion * codereg, 
746         ParseData * _parse_data)
747     {
748         Block * nextBlock = NULL;
749         Address nextBlockAddr;
750
751         nextBlockAddr = numeric_limits<Address>::max();
752         region_data * rd = _parse_data->findRegion(codereg);
753
754         if((nextBlock = rd->blocksByRange.successor(addr)) &&
755            nextBlock->start() > addr)
756         {
757             nextBlockAddr = nextBlock->start();   
758         }
759
760         return std::pair<Address,Block*>(nextBlockAddr,nextBlock);
761     }
762 }
763
764 void
765 Parser::parse_frame(ParseFrame & frame, bool recursive) {
766     /** Persistent intermediate state **/
767     dyn_detail::boost::shared_ptr<InstructionAdapter_t> ahPtr;
768     ParseFrame::worklist_t & worklist = frame.worklist;
769     dyn_hash_map<Address, Block *> & leadersToBlock = frame.leadersToBlock;
770     Address & curAddr = frame.curAddr;
771     Function * func = frame.func;
772     dyn_hash_map<Address, bool> & visited = frame.visited;
773     unsigned & num_insns = frame.num_insns;
774     func->_cache_valid = false;
775
776     /** Non-persistent intermediate state **/
777     Address nextBlockAddr;
778     Block * nextBlock;
779
780     if(frame.status() == ParseFrame::UNPARSED) {
781         parsing_printf("[%s] ==== starting to parse frame %lx ====\n",
782             FILE__,frame.func->addr());
783         // prevents recursion of parsing
784         frame.func->_parsed = true;
785     } else {
786         parsing_printf("[%s] ==== resuming parse of frame %lx ====\n",
787             FILE__,frame.func->addr());
788     }
789
790
791     frame.set_status(ParseFrame::PROGRESS);
792
793     while(!worklist.empty()) {
794         Block * cur = NULL;
795         ParseWorkElem * work = frame.popWork();
796
797         if(work->order() == ParseWorkElem::call) {
798             Function * ct = NULL;
799             Edge * ce = NULL;
800
801             if(!work->callproc()) {
802                 parsing_printf("[%s] binding call %lx->%lx\n",
803                     FILE__,work->edge()->src()->lastInsnAddr(),work->target());
804             
805                 pair<Function*,Edge*> ctp =
806                     bind_call(frame,
807                               work->target(),
808                               work->edge()->src(),
809                               work->edge());
810                 ct = ctp.first;
811                 ce = ctp.second;
812
813                 work->mark_call();
814             } else {
815                 ct = _parse_data->findFunc(frame.codereg,work->target());
816                 ce = work->edge();
817             }
818
819             if(recursive && ct &&
820                (frame_status(ct->region(),ct->addr())==ParseFrame::UNPARSED || 
821                 frame_status(ct->region(),ct->addr())==ParseFrame::BAD_LOOKUP))
822             {
823                 // suspend this frame and parse the next
824                 parsing_printf("    [suspend frame %lx]\n", func->addr()); 
825                 frame.call_target = ct;
826                 frame.set_status(ParseFrame::CALL_BLOCKED);
827                 // need to re-visit this edge
828                 frame.pushWork(work);
829                 return;
830             }
831             else if(ct && work->tailcall()) {
832                 // XXX The target has been or is currently being parsed (else
833                 //     the previous conditional would have been taken),
834                 //     so if its return status is unset then this
835                 //     function has to take UNKNOWN
836                 if(func->_rs != RETURN) {
837                     if(ct->_rs > NORETURN)
838                         func->_rs = ct->_rs;
839                     else if(ct->_rs == UNSET)
840                         func->_rs = UNKNOWN;
841                 }
842             }
843
844             // check for catch blocks after non-returning calls
845             if(ct && ct->_rs == NORETURN) {
846                 Address catchStart;
847                 Block * caller = ce->src();
848                 if(frame.codereg->findCatchBlock(caller->end(),catchStart)) {
849                     parsing_printf("[%s] found post-return catch block %lx\n",
850                         FILE__,catchStart);
851
852                     // make an edge
853                     Edge * catch_edge = link_tempsink(caller,CATCH);
854                 
855                     // push on worklist
856                     ParseWorkElem * catch_work = 
857                        work->bundle()->add(
858                         new ParseWorkElem(
859                             work->bundle(),
860                             catch_edge,
861                             catchStart,
862                             true, false)
863                        );
864                     frame.pushWork(catch_work);
865                 }
866             }
867     
868             continue;
869         } else if(work->order() == ParseWorkElem::call_fallthrough) {
870
871                         // check associated call edge's return status
872             Edge * ce = bundle_call_edge(work->bundle());
873             if(!ce) {
874                 // odd; no call edge in this bundle
875                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
876                     FILE__,work->edge()->src()->lastInsnAddr());
877             }
878             else {
879                 Address target = ce->trg()->start();
880                 Function * ct = _parse_data->findFunc(frame.codereg,target);
881                 bool is_plt = false;
882                 bool is_nonret = false;
883
884                 is_plt = HASHDEF(plt_entries,target);
885    
886                 // CodeSource-defined tests 
887                 is_nonret = obj().cs()->nonReturning(target);
888                 if (is_nonret) {
889                   parsing_printf("\t Disallowing FT edge: CodeSource reports nonreturning\n");
890                 }
891                 if(!is_nonret && is_plt) {
892                     is_nonret |= obj().cs()->nonReturning(plt_entries[target]);
893                             if (is_nonret) {
894                               parsing_printf("\t Disallowing FT edge: CodeSource reports PLT nonreturning\n");
895                             }
896                 }
897                 // Parsed return status tests
898                 if(!is_nonret && !is_plt && ct) {
899                     is_nonret |= (ct->retstatus() == NORETURN);
900                             if (is_nonret) {
901                               parsing_printf("\t Disallowing FT edge: function is non-returning\n");
902                             }
903                 }
904                 // Call-stack tampering tests
905                 if (unlikely(!is_nonret && frame.func->obj()->defensiveMode() && ct)) {
906                     is_nonret |= (ct->retstatus() == UNKNOWN);
907                     if (is_nonret) {
908                         parsing_printf("\t Disallowing FT edge: function in "
909                                        "defensive binary may not return\n");
910                         mal_printf("Disallowing FT edge: function %lx in "
911                                    "defensive binary may not return\n", ct->addr());
912                     } else {
913                         is_nonret |= (TAMPER_NONZERO == ct->tampersStack());
914                         is_nonret |= (TAMPER_ABS == ct->tampersStack());
915                         if (is_nonret) {
916                             mal_printf("Disallowing FT edge: function at %lx "
917                                        "tampers with its stack\n", ct->addr());
918                             parsing_printf("\t Disallowing FT edge: function "
919                                            "tampers with its stack\n");
920                         }
921                     }
922                 }
923                 if(is_nonret) {
924                     parsing_printf("[%s] no fallthrough for non-returning call "
925                                    "to %lx at %lx\n",FILE__,target,
926                                    work->edge()->src()->lastInsnAddr());
927
928                     // unlink tempsink fallthrough edge
929                     Edge * remove = work->edge();
930                     remove->src()->removeTarget(remove);
931                     factory().free_edge(remove);
932                     continue;
933                 }
934
935                 // Invalidate cache_valid for all sharing functions
936                 invalidateContainingFuncs(func, ce->src());                
937             }
938         } else if(work->order() == ParseWorkElem::seed_addr) {
939             cur = leadersToBlock[work->target()]; 
940         }
941                        
942         if(NULL == cur) { 
943             pair<Block*,Edge*> newedge =
944                 add_edge(frame,
945                          frame.func,
946                          work->edge()->src(),
947                          work->target(),
948                          work->edge()->type(),
949                          work->edge());
950             cur = newedge.first;
951         }
952
953         if(HASHDEF(visited,cur->start()))
954         {
955             parsing_printf("[%s] skipping locally parsed target at %lx\n",
956                 FILE__,work->target());
957             continue;
958         } 
959         visited[cur->start()] = true;
960         leadersToBlock[cur->start()] = cur;
961
962         if(!cur->_parsed)
963         {
964             parsing_printf("[%s] parsing block %lx\n",
965                 FILE__,cur->start());
966             if (frame.func->obj()->defensiveMode()) {
967                 mal_printf("new block at %lx (0x%lx)\n",cur->start(), cur);
968             }
969
970             cur->_parsed = true;
971             curAddr = cur->start();
972         } else {
973             parsing_printf("[%s] deferring parse of shared block %lx\n",
974                 FILE__,cur->start());
975             if (func->_rs < UNKNOWN) {
976                 // we've parsed into another function, if we've parsed
977                 // into it's entry point, set retstatus to match it
978                 Function * other_func = _parse_data->findFunc(
979                     func->region(), cur->start());
980                 if (other_func && other_func->retstatus() > UNKNOWN) {
981                     func->_rs = other_func->retstatus();
982                 }
983                 else {
984                     func->_rs = UNKNOWN;
985                 }
986             }
987             continue;
988         }
989
990         /*
991          * External consumers of parsing may have invoked finalizing
992          * methods from callback routines. Ensure that the caches
993          * are invalidated because a new block is being added to the
994          * function's view
995          */
996         func->_cache_valid = false;
997
998         // NB Using block's region() here because it may differ from the
999         //    function's if control flow has jumped to another region
1000         unsigned size = 
1001             cur->region()->offset() + cur->region()->length() - curAddr;
1002 #if defined(cap_instruction_api)
1003         const unsigned char* bufferBegin = 
1004             (const unsigned char *)(func->isrc()->getPtrToInstruction(curAddr));
1005         InstructionDecoder dec = InstructionDecoder(
1006             bufferBegin,size,frame.codereg->getArch());
1007         InstructionAdapter_t *ah = new InstructionAdapter_t(
1008             dec, curAddr, func->obj(), cur->region(), func->isrc(), cur);
1009 #else        
1010         InstrucIter iter(curAddr, size, func->isrc());
1011         InstructionAdapter_t ah(iter, 
1012             func->obj(), cur->region(), func->isrc());
1013 #endif        
1014
1015         using boost::tuples::tie;
1016         tie(nextBlockAddr,nextBlock) = get_next_block(
1017             frame.curAddr, frame.codereg, _parse_data);
1018
1019         bool isNopBlock = ah->isNop();
1020
1021         while(true) 
1022         {
1023             curAddr = ah->getAddr();
1024             /** Check for straight-line fallthrough **/
1025             if(curAddr == nextBlockAddr) {
1026                 parsing_printf("[%s] straight-line parse into block at %lx\n",
1027                     FILE__,curAddr);
1028                 if (frame.func->obj()->defensiveMode()) {
1029                     mal_printf("new block at %lx\n",curAddr);
1030                 }
1031                 ah->retreat();
1032                 curAddr = ah->getAddr();
1033
1034                 end_block(cur,*ah);
1035                 pair<Block*,Edge*> newedge =
1036                     add_edge(frame,frame.func,cur,
1037                              nextBlockAddr,FALLTHROUGH,NULL);
1038         
1039                 if(!HASHDEF(visited,nextBlockAddr) && 
1040                    !HASHDEF(leadersToBlock,nextBlockAddr)) { 
1041                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1042                         FILE__,__LINE__,nextBlockAddr);
1043
1044                     frame.pushWork(
1045                         new ParseWorkElem(
1046                             NULL,
1047                             newedge.second,
1048                             nextBlockAddr,
1049                             true,
1050                             false)
1051                         );
1052                     leadersToBlock[nextBlockAddr] = nextBlock;
1053                 }
1054                 break;
1055             } else if(curAddr > nextBlockAddr) {
1056                 parsing_printf("[%s:%d] inconsistent instruction stream: "
1057                                "%lx is within [%lx,%lx)\n",
1058                     FILE__,__LINE__,curAddr,
1059                     nextBlock->start(),nextBlock->end());
1060
1061                 // NB "cur" hasn't ended, so its range may
1062                 // not look like it overlaps with nextBlock
1063                 _pcb.overlapping_blocks(cur,nextBlock);
1064
1065                 tie(nextBlockAddr,nextBlock) = 
1066                     get_next_block(frame.curAddr, frame.codereg, _parse_data);
1067             }
1068
1069             // per-instruction callback notification 
1070             ParseCallback::insn_details insn_det;
1071             insn_det.insn = ah;
1072              
1073                 parsing_printf("[%s:%d] curAddr 0x%lx \n",
1074                     FILE__,__LINE__,curAddr);
1075
1076             if (func->_is_leaf_function) {
1077             Address ret_addr;
1078                 func->_is_leaf_function = !(insn_det.insn->isReturnAddrSave(ret_addr));
1079                     parsing_printf("[%s:%d] leaf %d funcname %s \n",
1080                         FILE__,__LINE__,func->_is_leaf_function, func->name().c_str());
1081                 if (!func->_is_leaf_function) func->_ret_addr = ret_addr;       
1082             }
1083                 
1084             _pcb.instruction_cb(func,cur,curAddr,&insn_det);
1085
1086             if(isNopBlock && !ah->isNop()) {
1087                 ah->retreat();
1088
1089                 end_block(cur,*ah);
1090                 pair<Block*,Edge*> newedge =
1091                     add_edge(frame,frame.func,cur,curAddr,FALLTHROUGH,NULL);
1092                 Block * targ = newedge.first;
1093   
1094                 parsing_printf("[%s:%d] nop-block ended at %lx\n",
1095                     FILE__,__LINE__,curAddr); 
1096                 if(targ && !HASHDEF(visited,targ->start())) {
1097                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1098                         FILE__,__LINE__,targ->start());
1099
1100                     frame.pushWork(
1101                         new ParseWorkElem(
1102                             NULL,
1103                             newedge.second,
1104                             targ->start(),
1105                             true,
1106                             false)
1107                         );
1108                     leadersToBlock[targ->start()] = targ; 
1109                 }
1110                 break;
1111             }
1112             
1113             /** Particular instruction handling (calls, branches, etc) **/
1114             ++num_insns; 
1115
1116             if(ah->hasCFT()) {
1117                ProcessCFInsn(frame,cur,*ah);
1118                break;
1119             }
1120             else if(func->_saves_fp && 
1121                     func->_no_stack_frame &&  
1122                     ah->isFrameSetupInsn()) // isframeSetup is expensive
1123             {
1124                 func->_no_stack_frame = false;
1125             }
1126             else if(ah->isLeave())
1127             {
1128                 func->_no_stack_frame = false;
1129             }
1130             else if( ah->isAbortOrInvalidInsn() )
1131             {
1132                 // 4. Invalid or `abort-causing' instructions
1133                 end_block(cur,*ah);
1134                 link(cur, _sink, DIRECT, true);
1135                 break; 
1136             }
1137             else if( ah->isInterruptOrSyscall() )
1138             {
1139                 // 5. Raising instructions
1140                 end_block(cur,*ah);
1141     
1142                 pair<Block*,Edge*> newedge =
1143                     add_edge(frame,frame.func,cur,ah->getNextAddr(),FALLTHROUGH,NULL);
1144                 Block * targ = newedge.first;
1145    
1146                 if(targ && !HASHDEF(visited,targ->start()) &&
1147                            !HASHDEF(leadersToBlock,targ->start())) {
1148                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1149                         FILE__,__LINE__,targ->start());
1150
1151                     frame.pushWork(
1152                         new ParseWorkElem(
1153                             NULL,
1154                             newedge.second,
1155                             targ->start(),
1156                             true,
1157                             false)
1158                         );
1159                     leadersToBlock[targ->start()] = targ; 
1160                 }
1161                 if (unlikely(func->obj()->defensiveMode())) {
1162                     fprintf(stderr,"parsed bluepill insn sysenter or syscall "
1163                             "in defensive mode at %lx\n",curAddr);
1164                 }
1165                 break;
1166             }
1167             else if(unlikely(func->obj()->defensiveMode())) 
1168             {
1169                 if (!_pcb.hasWeirdInsns(func) && ah->isGarbageInsn())
1170                 {
1171                     // add instrumentation at this addr so we can
1172                     // extend the function if this really executes
1173                     // KEVINTODO: the weirdInsns flag really ought to be associated with the block so we can halt parsing along multiple function paths
1174                     ParseCallback::default_details det(
1175                         (unsigned char*) cur->region()->getPtrToInstruction(cur->lastInsnAddr()),
1176                         cur->end() - cur->lastInsnAddr(),
1177                         true);
1178                     _pcb.abruptEnd_cf(cur->lastInsnAddr(),cur,&det);
1179                     _pcb.foundWeirdInsns(func);
1180                     end_block(cur,*ah);
1181                     break;
1182                 }
1183                 else if (ah->isNopJump()) 
1184                 {
1185                     // patch the jump to make it a nop, and re-set the 
1186                     // instruction adapter so we parse the instruction
1187                     // as a no-op this time, allowing the subsequent
1188                     // instruction to be parsed correctly
1189                     mal_printf("Nop jump at %lx, changing it to nop\n",ah->getAddr());
1190                     _pcb.patch_nop_jump(ah->getAddr());
1191                     unsigned bufsize = 
1192                         func->region()->offset() + func->region()->length() - ah->getAddr();
1193 #if defined(cap_instruction_api)
1194                     const unsigned char* bufferBegin = (const unsigned char *)
1195                         (func->isrc()->getPtrToInstruction(ah->getAddr()));
1196                     delete ah;
1197                     dec = InstructionDecoder
1198                         (bufferBegin, bufsize, frame.codereg->getArch());
1199                     ah = new InstructionAdapter_t(dec, curAddr, func->obj(), 
1200                                               func->region(), func->isrc(), cur);
1201 #else        
1202                     assert(0 && "This case is only possible on x86"); 
1203 #endif
1204                 }
1205                 else {
1206                     entryID id = ah->getInstruction()->getOperation().getID();
1207                     switch (id) {
1208                         case e_rdtsc:
1209                             fprintf(stderr,"parsed bluepill insn rdtsc at %lx\n",curAddr);
1210                             break;
1211                         case e_sldt:
1212                             fprintf(stderr,"parsed bluepill insn sldt at %lx\n",curAddr);
1213                             break;
1214                         default:
1215                             break;
1216                     }
1217                 }
1218             }
1219             else {
1220                 // default
1221             }
1222
1223             /** Check for overruns of valid address space **/
1224             if(!is_code(func,ah->getNextAddr()))
1225             {
1226                 parsing_printf("[%s] next insn %lx is invalid\n",
1227                     FILE__,ah->getNextAddr());
1228
1229                 end_block(cur,*ah);
1230                 break;
1231             }
1232             else if(!cur->region()->contains(ah->getNextAddr()))
1233             {
1234                 parsing_printf("[%s] next address %lx is outside [%lx,%lx)\n",
1235                     FILE__,ah->getNextAddr(),
1236                     cur->region()->offset(),
1237                     cur->region()->offset()+cur->region()->length());
1238                 end_block(cur,*ah);
1239                 break;
1240             }
1241             ah->advance();
1242         }
1243         delete ah;
1244     }
1245
1246     /** parsing complete **/
1247     if(HASHDEF(plt_entries,frame.func->addr())) {
1248        if(obj().cs()->nonReturning(frame.func->addr())) {
1249           frame.func->_rs = NORETURN;
1250        }
1251        else
1252           frame.func->_rs = UNKNOWN; 
1253
1254        // Convenience -- adopt PLT name
1255        frame.func->_name = plt_entries[frame.func->addr()];
1256     }
1257     else if(frame.func->_rs == UNSET) {
1258         frame.func->_rs = NORETURN;
1259     }
1260     frame.set_status(ParseFrame::PARSED);
1261 }
1262
1263 void
1264 Parser::end_block(Block * b, InstructionAdapter_t & ah)
1265 {
1266     b->_lastInsn = ah.getAddr();
1267     b->_end = ah.getNextAddr();
1268
1269     record_block(b);
1270 }
1271
1272 void
1273 Parser::record_block(Block *b)
1274 {
1275     parsing_printf("[%s:%d] recording block [%lx,%lx)\n",
1276         FILE__,__LINE__,b->start(),b->end());
1277     _parse_data->record_block(b->region(),b);
1278 }
1279
1280
1281 Block *
1282 Parser::block_at(
1283     Function * owner,
1284     Address addr,
1285     Block * & split)
1286 {
1287     set<Block *> overlap;
1288     Block * exist = NULL;
1289     Block * ret = NULL;
1290     Block * inconsistent = NULL;
1291     Address prev_insn = 0;
1292
1293     split = NULL;
1294
1295     CodeRegion *cr;
1296     if(owner->region()->contains(addr))
1297         cr = owner->region();
1298     else
1299         cr = _parse_data->reglookup(owner->region(),addr);
1300
1301     if(!is_code(owner,addr)) {
1302         parsing_printf("[%s] block address %lx rejected by isCode()\n",
1303             FILE__,addr);
1304         return NULL;
1305     }
1306
1307     if(NULL == (exist = _parse_data->findBlock(cr,addr))) {
1308         _parse_data->findBlocks(cr,addr,overlap);
1309         if(overlap.size() > 1)
1310             parsing_printf("[%s] address %lx overlapped by %d blocks\n",
1311                 FILE__,addr,overlap.size());
1312
1313         /* Platform specific consistency test: 
1314            generally checking for whether the address is an
1315            instruction boundary in a block */
1316         for(set<Block *>::iterator sit=overlap.begin();sit!=overlap.end();++sit)
1317         {
1318             Block * b = *sit;
1319
1320             // consistent will fill in prev_insn with the address of the
1321             // instruction preceeding addr if addr is consistent
1322             if(b->consistent(addr,prev_insn)) {
1323                 exist = b;
1324                 break;   
1325             } else {
1326                 parsing_printf("[%s] %lx is inconsistent with [%lx,%lx)\n", 
1327                     FILE__,addr,b->start(),b->end());
1328                 if(inconsistent) {
1329                     parsing_printf("   multiple inconsistent blocks!\n");
1330                 }
1331                 inconsistent = b;
1332             }
1333         }
1334     }
1335
1336     if(exist) {
1337         if(exist->start() == addr) {
1338             parsing_printf("[%s] block %lx exists\n",
1339                 FILE__,addr);
1340             ret = exist;
1341         }
1342         else {
1343             parsing_printf("[%s] address %lx splits [%lx,%lx) (%p)\n",
1344                FILE__,addr,exist->start(),exist->end(),exist);
1345             if (owner->obj()->defensiveMode()) {
1346                 mal_printf("new block at %lx splits [%lx %lx)\n",
1347                            addr, exist->start(), exist->end());
1348             }
1349             split = exist;
1350             ret = split_block(owner,exist,addr,prev_insn);
1351         }
1352     } else {
1353         ret = factory().mkblock(owner,cr,addr);
1354         record_block(ret);
1355     }
1356
1357     if(unlikely(inconsistent)) {
1358        _pcb.overlapping_blocks(ret,inconsistent); 
1359     }
1360
1361     return ret;
1362 }
1363
1364 pair<Block *,Edge *>
1365 Parser::add_edge(
1366     ParseFrame & frame,
1367     Function * owner,
1368     Block * src,
1369     Address dst,
1370     EdgeTypeEnum et,
1371     Edge * exist)
1372 {
1373     Block * split = NULL;
1374     Block * ret = NULL;
1375     Edge * newedge = NULL;
1376     pair<Block *, Edge *> retpair((Block *) NULL, (Edge *) NULL);
1377
1378     if(!is_code(owner,dst)) {
1379         parsing_printf("[%s] target address %lx rejected by isCode()\n",dst);
1380         return retpair;
1381     }
1382
1383     ret = block_at(owner,dst,split);
1384     retpair.first = ret;
1385
1386     if(split == src) {
1387         // special case -- same block
1388         src = ret;
1389     }
1390
1391     if(split && HASHDEF(frame.visited,split->start())) {
1392         // prevent "delayed parsing" of extant block if 
1393         // this frame has already visited it
1394         frame.visited[ret->start()] = true;
1395         frame.leadersToBlock[ret->start()] = ret;
1396     }
1397
1398     if(NULL == exist) {
1399         newedge = link(src,ret,et,false);
1400         retpair.second = newedge;
1401     } else {
1402         relink(exist,src,ret);
1403         retpair.second = exist;
1404     }
1405
1406     return retpair;
1407 }
1408
1409 Block *
1410 Parser::split_block(
1411     Function * owner, 
1412     Block *b, 
1413     Address addr, 
1414     Address previnsn)
1415 {
1416     Block * ret;
1417     CodeRegion * cr;
1418     if(owner->region()->contains(b->start()))
1419         cr = owner->region();
1420     else
1421         cr = _parse_data->reglookup(owner->region(),b->start());
1422     region_data * rd = _parse_data->findRegion(cr);
1423
1424     // enable for extra-safe testing, but callers are responsbible
1425     // assert(b->consistent(addr);
1426
1427     ret = factory().mkblock(owner,cr,addr);
1428
1429     // move out edges
1430     vector<Edge *> & trgs = b->_targets;
1431     vector<Edge *>::iterator tit = trgs.begin(); 
1432     for(;tit!=trgs.end();++tit) {
1433         Edge *e = *tit;
1434         e->_source = ret;
1435         ret->_targets.push_back(e);
1436     }
1437     trgs.clear();
1438     ret->_end = b->_end;
1439     ret->_lastInsn = b->_lastInsn;
1440     ret->_parsed = true;
1441     link(b,ret,FALLTHROUGH,false); 
1442
1443     record_block(ret);
1444
1445     // b's range has changed
1446     rd->blocksByRange.remove(b);
1447     b->_end = addr;
1448     b->_lastInsn = previnsn;
1449     rd->blocksByRange.insert(b); 
1450     // Any functions holding b that have already been finalized
1451     // need to have their caches invalidated so that they will
1452     // find out that they have this new 'ret' block
1453     std::set<Function*> prev_owners;
1454     rd->findFuncs(b->start(),prev_owners);
1455     for(std::set<Function*>::iterator oit = prev_owners.begin();
1456         oit != prev_owners.end(); ++oit)
1457     {
1458         Function * po = *oit;
1459         po->_cache_valid = false;
1460         parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
1461                 "func at %lx\n",
1462         FILE__,__LINE__,b->start(),b->end(),po->addr());
1463     }
1464
1465     // if we're re-parsing in this function, inform user program of the split
1466     _pcb.block_split(b,ret);
1467
1468     return ret;
1469  }
1470
1471 pair<Function *,Edge*>
1472 Parser::bind_call(ParseFrame & frame, Address target, Block * cur, Edge * exist)
1473 {
1474     Function * tfunc = NULL;
1475     Block * tblock = NULL;
1476     FuncSource how = RT;
1477     if(frame.func->_src == GAP || frame.func->_src == GAPRT)
1478         how = GAPRT;
1479
1480     // look it up
1481     tfunc = _parse_data->get_func(frame.codereg,target,how);
1482     if(!tfunc) {
1483         parsing_printf("[%s:%d] can't bind call to %lx\n",
1484             FILE__,__LINE__,target);
1485         return pair<Function*,Edge*>((Function *) NULL,exist);
1486     }
1487
1488     // add an edge
1489     pair<Block*,Edge*> ret = add_edge(frame,tfunc,cur,target,CALL,exist);
1490     tblock = ret.first;
1491     if(!tblock) {
1492         parsing_printf("[%s:%d] can't bind call to %lx\n",
1493             FILE__,__LINE__,target);
1494         return pair<Function*,Edge*>((Function *) NULL,exist);
1495     }
1496
1497     return pair<Function*,Edge*>(tfunc,ret.second);
1498 }
1499
1500 Function *
1501 Parser::findFuncByEntry(CodeRegion *r, Address entry)
1502 {
1503     if(_parse_state < PARTIAL) {
1504         parsing_printf("[%s:%d] Parser::findFuncByEntry([%lx,%lx),%lx) "
1505                        "forced parsing\n",
1506             FILE__,__LINE__,r->low(),r->high(),entry);
1507         parse();
1508     }
1509     return _parse_data->findFunc(r,entry);
1510 }
1511
1512 int 
1513 Parser::findFuncs(CodeRegion *r, Address addr, set<Function *> & funcs)
1514 {
1515     if(_parse_state < COMPLETE) {
1516         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1517                        "forced parsing\n",
1518             FILE__,__LINE__,r->low(),r->high(),addr);
1519         parse();
1520     }
1521     if(_parse_state < FINALIZED) {
1522         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1523                        "forced finalization\n",
1524             FILE__,__LINE__,r->low(),r->high(),addr);
1525         finalize();
1526     }
1527     return _parse_data->findFuncs(r,addr,funcs);
1528 }
1529
1530 int 
1531 Parser::findFuncs(CodeRegion *r, Address start, Address end, set<Function *> & funcs)
1532 {
1533     if(_parse_state < COMPLETE) {
1534         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
1535                        "forced parsing\n",
1536             FILE__,__LINE__,r->low(),r->high(),start,end);
1537         parse();
1538     }
1539     if(_parse_state < FINALIZED) {
1540         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
1541                        "forced finalization\n",
1542             FILE__,__LINE__,r->low(),r->high(),start,end);
1543         finalize();
1544     }
1545     return _parse_data->findFuncs(r,start,end,funcs);
1546 }
1547
1548 Block *
1549 Parser::findBlockByEntry(CodeRegion *r, Address entry)
1550 {
1551     if(_parse_state < PARTIAL) {
1552         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
1553                        "forced parsing\n",
1554             FILE__,__LINE__,r->low(),r->high(),entry);
1555         parse();
1556     }
1557     return _parse_data->findBlock(r,entry);
1558 }
1559
1560 Block *
1561 Parser::findNextBlock(CodeRegion *r, Address addr)
1562 {
1563     if(_parse_state < PARTIAL) {
1564         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
1565                        "forced parsing\n",
1566             FILE__,__LINE__,r->low(),r->high(),addr);
1567         parse();
1568     }
1569     return _parse_data->findRegion(r)->get_next_block(addr).second;
1570 }
1571
1572 int
1573 Parser::findBlocks(CodeRegion *r, Address addr, set<Block *> & blocks)
1574 {
1575     if(_parse_state < COMPLETE) {
1576         parsing_printf("[%s:%d] Parser::findBlocks([%lx,%lx),%lx,...) "
1577                        "forced parsing\n",
1578             FILE__,__LINE__,r->low(),r->high(),addr);
1579         parse();
1580     }
1581     return _parse_data->findBlocks(r,addr,blocks); 
1582 }
1583
1584 Edge*
1585 Parser::link(Block *src, Block *dst, EdgeTypeEnum et, bool sink)
1586 {
1587     assert(et != NOEDGE);
1588     Edge * e = factory().mkedge(src,dst,et);
1589     e->_type._sink = sink;
1590     src->_targets.push_back(e);
1591     dst->_sources.push_back(e);
1592     return e;
1593 }
1594
1595 /* 
1596  * During parsing, all edges are temporarily linked to the _tempsink
1597  * block. Adding and then removing edges to and from this block is
1598  * wasteful, especially given that removal is an O(N) operation with
1599  * vector storage. This call thus does not record edges into the sink.
1600  * These edges are guaranteed to have been relinked when parsing is
1601  * in state COMPLETE.
1602  *
1603  * NB This introduces an inconsistency into the CFG invariant that all
1604  *    targets of an edge have that edge in their source list if the
1605  *    data structures are queried during parsing. Extenders of 
1606  *    parsing callbacks are the only ones who can see this state;
1607  *    they have been warned and should know better.
1608  */
1609 Edge*
1610 Parser::link_tempsink(Block *src, EdgeTypeEnum et)
1611 {
1612     Edge * e = factory().mkedge(src,_sink,et);
1613     e->_type._sink = true;
1614     src->_targets.push_back(e);
1615     return e;
1616 }
1617
1618 void
1619 Parser::relink(Edge * e, Block *src, Block *dst)
1620 {
1621     if(src != e->src()) {
1622         e->src()->removeTarget(e);
1623         e->_source = src;
1624         src->addTarget(e);
1625     }
1626     if(dst != e->trg()) { 
1627         if(e->trg() != _sink)
1628             e->trg()->removeSource(e);
1629         e->_target = dst;
1630         dst->addSource(e);
1631     }
1632
1633     e->_type._sink = (dst == _sink);
1634 }
1635
1636 ParseFrame::Status
1637 Parser::frame_status(CodeRegion * cr, Address addr)
1638 {
1639     // XXX parsing frames may have been cleaned up, but status
1640     //     is always cached
1641     return _parse_data->frameStatus(cr,addr);
1642 }
1643
1644 void
1645 Parser::remove_func(Function *func)
1646 {
1647     if (sorted_funcs.end() != sorted_funcs.find(func)) {
1648         sorted_funcs.erase(func);
1649     }
1650     if (HINT == func->src()) {
1651         for (unsigned fidx=0; fidx < hint_funcs.size(); fidx++) {
1652             if (hint_funcs[fidx] == func) {
1653                 hint_funcs[fidx] = hint_funcs[hint_funcs.size()-1];
1654                 hint_funcs.pop_back();
1655                 break;
1656             }
1657         }
1658     }
1659     else {
1660         for (unsigned fidx=0; fidx < discover_funcs.size(); fidx++) {
1661             if (discover_funcs[fidx] == func) {
1662                 discover_funcs[fidx] = discover_funcs[discover_funcs.size()-1];
1663                 discover_funcs.pop_back();
1664                 break;
1665             }
1666         }
1667     }
1668     
1669     _parse_data->remove_func(func);
1670 }
1671
1672 void
1673 Parser::remove_block(Dyninst::ParseAPI::Block *block)
1674 {
1675     if (block->containingFuncs() == 1) {
1676         _pcb.block_delete(block);
1677     }
1678     _parse_data->remove_block(block);
1679 }
1680
1681 void Parser::move_func(Function *func, Address new_entry, CodeRegion *new_reg)
1682 {
1683     region_data *reg_data = _parse_data->findRegion(func->region());
1684     reg_data->funcsByAddr.erase(func->addr());
1685
1686     reg_data = _parse_data->findRegion(new_reg);
1687     reg_data->funcsByAddr[new_entry] = func;
1688 }
1689
1690
1691 /* called in defensive mode to create parseFrames at tampered addresses 
1692    for functions that return TAMPER_ABS. */
1693 ParseFrame * 
1694 Parser::getTamperAbsFrame(Function *tamperFunc)
1695 {
1696     assert(TAMPER_ABS == tamperFunc->tampersStack());
1697     Function * targFunc = NULL;
1698
1699     // get the binary's load address and subtract it
1700     Address loadAddr = 0;
1701     if ( ! _pcb.loadAddr(tamperFunc->_tamper_addr, loadAddr) ) {
1702         parsing_printf("WARNING: Failed to find object load address "
1703                        "for tampered return address 0x%lx\n", 
1704                        tamperFunc->_tamper_addr);
1705         mal_printf("WARNING: Failed to find object load address "
1706                    "for tampered return address 0x%lx\n", 
1707                    tamperFunc->_tamper_addr);
1708         tamperFunc->_tamper = TAMPER_NONZERO;
1709         return NULL; // failed to find object load address
1710     }
1711     Address target = tamperFunc->_tamper_addr - loadAddr;
1712
1713     targFunc = _parse_data->get_func
1714         (tamperFunc->region(), 
1715          target, 
1716          tamperFunc->src());
1717
1718     if (!targFunc) {
1719         targFunc = _parse_data->get_func(tamperFunc->region(),target,RT);
1720     }
1721
1722     if(!targFunc) {
1723         mal_printf("ERROR: could not create function at tamper "
1724                    "addr %lx\n",target);
1725         return NULL;
1726     }
1727
1728     ParseFrame * pf = NULL;
1729     CodeRegion *reg = targFunc->region();
1730
1731     ParseFrame::Status exist = _parse_data->frameStatus(reg, target);
1732     switch(exist) {
1733     case ParseFrame::FRAME_ERROR:
1734     case ParseFrame::PROGRESS:
1735         fprintf(stderr,"ERROR: function frame at %lx in bad state, can't "
1736                 "add edge; status=%d\n",target, exist);
1737         return NULL;
1738         break;
1739     case ParseFrame::PARSED:
1740         fprintf(stderr,"ERROR: function frame at %lx already parsed, can't "
1741                 "add edge; status=%d\n",target, exist);
1742         return NULL;
1743         break;
1744     case ParseFrame::BAD_LOOKUP:
1745         // create new frame
1746         pf = _parse_data->findFrame(reg, target);
1747         assert( !pf );
1748         pf = new ParseFrame(targFunc,_parse_data);
1749         break;
1750     case ParseFrame::UNPARSED:
1751     case ParseFrame::CALL_BLOCKED:
1752         pf = _parse_data->findFrame(reg, target);
1753         if ( !pf ) {
1754             fprintf(stderr,"ERROR: no function frame at %lx for frame "
1755                     "that should exist, can't add edge; status=%d\n",
1756                     target, exist);
1757             return NULL;
1758         }
1759         break;
1760     default:
1761         assert(0);
1762     }
1763         
1764     // make a temp edge
1765     Function::blocklist & ret_blks = tamperFunc->returnBlocks();
1766     for (Function::blocklist::iterator bit = ret_blks.begin(); 
1767          bit != ret_blks.end(); 
1768          bit++)
1769     {
1770         Edge *edge = link_tempsink(*bit, CALL);
1771
1772         // create new bundle since we're not adding CALL,CALL_FT edge pairs
1773         ParseWorkBundle *bundle = new ParseWorkBundle();
1774         pf->work_bundles.push_back(bundle);
1775         bundle->add(
1776             new ParseWorkElem(
1777                 bundle,
1778                 edge,
1779                 target,
1780                 true,
1781                 true)
1782           );
1783     }
1784
1785     return pf;
1786 }
1787
1788 void Parser::invalidateContainingFuncs(Function *owner, Block *b)
1789 {
1790     CodeRegion * cr;
1791     if(owner->region()->contains(b->start()))
1792         cr = owner->region();
1793     else
1794         cr = _parse_data->reglookup(owner->region(),b->start());
1795     region_data * rd = _parse_data->findRegion(cr);
1796     
1797
1798     // Any functions holding b that have already been finalized
1799     // need to have their caches invalidated so that they will
1800     // find out that they have this new 'ret' block
1801     std::set<Function*> prev_owners;
1802     rd->findFuncs(b->start(),prev_owners);
1803     for(std::set<Function*>::iterator oit = prev_owners.begin();
1804         oit != prev_owners.end(); ++oit)
1805     {
1806         Function * po = *oit;
1807         po->_cache_valid = false;
1808         parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
1809                        "func at %lx\n",
1810                        FILE__,__LINE__,b->start(),b->end(),po->addr());
1811     }   
1812 }