1. If finalizing a function needs to invoke a parse, it should invoke
[dyninst.git] / parseAPI / src / Parser.C
1 /*
2  * See the dyninst/COPYRIGHT file for copyright information.
3  * 
4  * We provide the Paradyn Tools (below described as "Paradyn")
5  * on an AS IS basis, and do not warrant its validity or performance.
6  * We reserve the right to update, modify, or discontinue this
7  * software at any time.  We shall have no obligation to supply such
8  * updates or modifications or any other form of support to you.
9  * 
10  * By your use of Paradyn, you understand and agree that we (or any
11  * other person or entity with proprietary rights in Paradyn) are
12  * under no obligation to provide either maintenance services,
13  * update services, notices of latent defects, or correction of
14  * defects for Paradyn.
15  * 
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  * 
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  * 
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29  */
30
31 #include <omp.h>
32
33 #include <vector>
34 #include <limits>
35 #include <algorithm>
36 // For Mutex
37 #define PROCCONTROL_EXPORTS
38
39 #include "dyntypes.h"
40
41 #include "CodeObject.h"
42 #include "CFGFactory.h"
43 #include "ParseCallback.h"
44 #include "Parser.h"
45 #include "Parser.h"
46 #include "CFG.h"
47 #include "util.h"
48 #include "debug_parse.h"
49
50 #include <boost/bind/bind.hpp>
51
52
53 #include <boost/timer/timer.hpp>
54 #include <fstream>
55
56 #ifdef ENABLE_RACE_DETECTION
57 #define USE_CILK 1
58 #else
59 #define USE_OPENMP 1
60 #endif
61
62 #if USE_CILK
63 #include <cilk/cilk.h>
64 #include <cilk/cilk_api.h>
65 #endif
66
67 #include "race-detector-annotations.h"
68
69 using namespace std;
70 using namespace Dyninst;
71 using namespace Dyninst::ParseAPI;
72 using namespace Dyninst::InstructionAPI;
73
74 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
75 typedef vector< edge_pair_t > Edges_t;
76
77 #include "common/src/dthread.h"
78
79 namespace {
80     struct less_cr {
81         bool operator()(CodeRegion * x, CodeRegion * y)
82         {
83             return x->offset() < y->offset();
84         }
85     };
86 }
87
88 Parser::Parser(CodeObject & obj, CFGFactory & fact, ParseCallbackManager & pcb) :
89         _obj(obj),
90         _cfgfact(fact),
91         _pcb(pcb),
92         _parse_data(NULL),
93         _parse_state(UNPARSED),
94         _in_parse(false),
95         _in_finalize(false)
96 {
97     delayed_frames.size = 0;
98     // cache plt entries for fast lookup
99     const map<Address, string> & lm = obj.cs()->linkage();
100     map<Address, string>::const_iterator lit = lm.begin();
101     for( ; lit != lm.end(); ++lit) {
102         parsing_printf("Cached PLT entry %s (%lx)\n",lit->second.c_str(),lit->first);
103         plt_entries[lit->first] = lit->second;
104     }
105
106     if(obj.cs()->regions().empty()) {
107         parsing_printf("[%s:%d] CodeSource provides no CodeRegions"
108                                " -- unparesable\n",
109                        FILE__,__LINE__);
110         _parse_state = UNPARSEABLE;
111         return;
112     }
113
114     // check whether regions overlap
115     vector<CodeRegion *> const& regs = obj.cs()->regions();
116     vector<CodeRegion *> copy(regs.begin(),regs.end());
117     sort(copy.begin(),copy.end(),less_cr());
118
119     // allocate a sink block -- region is arbitrary
120     _sink = new Block(&_obj, regs[0], std::numeric_limits<Address>::max());
121
122     bool overlap = false;
123     CodeRegion * prev = copy[0], *cur = NULL;
124     for(unsigned i=1;i<copy.size();++i) {
125         cur = copy[i];
126         if(cur->offset() < prev->offset() + prev->length()) {
127             parsing_printf("Overlapping code regions [%lx,%lx) and [%lx,%lx)\n",
128                            prev->offset(),prev->offset()+prev->length(),
129                            cur->offset(),cur->offset()+cur->length());
130             overlap = true;
131             break;
132         }
133     }
134
135     if(overlap)
136         _parse_data = new OverlappingParseData(this,copy);
137     else
138         _parse_data = new StandardParseData(this);
139 }
140
141 ParseFrame::~ParseFrame()
142 {
143     cleanup(); // error'd frames still need cleanup
144 }
145
146 Parser::~Parser()
147 {
148     if(_parse_data)
149         delete _parse_data;
150
151     for(auto fit = frames.begin() ; fit != frames.end(); ++fit)
152         delete *fit;
153
154     frames.clear();
155 }
156
157 void
158 Parser::add_hint(Function * f)
159 {
160     if(!_parse_data->findFunc(f->region(),f->addr()))
161         record_func(f);
162 }
163
164 template <typename T>
165 std::string pair_to_string(T pr)
166 {
167     std::stringstream s;
168     s << pr.first << ", " << pr.second;
169     return s.str();
170 }
171
172
173 void
174 Parser::parse()
175 {
176     parsing_printf("[%s:%d] parse() called on Parser %p with state %d\n",
177                    FILE__,__LINE__,this, _parse_state);
178
179     // For modification: once we've full-parsed once, don't do it again
180     if (_parse_state >= COMPLETE) return;
181
182     if(_parse_state == UNPARSEABLE)
183         return;
184
185     ScopeLock<Mutex<true> > L(parse_mutex);
186 //    assert(!_in_parse);
187     _in_parse = true;
188
189     parse_vanilla();
190     finalize();
191     // anything else by default...?
192
193     if(_parse_state < COMPLETE)
194         _parse_state = COMPLETE;
195
196     _in_parse = false;
197     parsing_printf("[%s:%d] parsing complete for Parser %p with state %d\n", FILE__, __LINE__, this, _parse_state);
198 #ifdef ADD_PARSE_FRAME_TIMERS
199     std::ofstream stat_log("functions.csv");
200     stat_log << "Results for " << time_histogram.size() << " buckets\n";
201     stat_log << "usecs,count\n";
202     std::transform(time_histogram.begin(), time_histogram.end(),
203                    std::ostream_iterator<std::string >(stat_log, "\n"),
204                    pair_to_string<std::pair<unsigned int, unsigned int> >);
205 #endif
206 }
207
208 void
209 Parser::parse_at(
210         CodeRegion * region,
211         Address target,
212         bool recursive,
213         FuncSource src)
214 {
215     Function *f;
216     ParseFrame *pf;
217     LockFreeQueue<ParseFrame *> work;
218
219     parsing_printf("[%s:%d] entered parse_at([%lx,%lx),%lx)\n",
220                    FILE__,__LINE__,region->low(),region->high(),target);
221
222     if(!region->contains(target)) {
223         parsing_printf("\tbad address, bailing\n");
224         return;
225     }
226
227     if(_parse_state < PARTIAL)
228         _parse_state = PARTIAL;
229
230     f = _parse_data->get_func(region,target,src);
231     if(!f) {
232         parsing_printf("   could not create function at %lx\n",target);
233         return;
234     }
235
236     ParseFrame::Status exist = _parse_data->frameStatus(region,target);
237     if(exist != ParseFrame::BAD_LOOKUP && exist != ParseFrame::UNPARSED) {
238         parsing_printf("   function at %lx already parsed, status %d\n",
239                        target, exist);
240         return;
241     }
242
243     if(!(pf = _parse_data->findFrame(region,target))) {
244         pf = new ParseFrame(f,_parse_data);
245         init_frame(*pf);
246         frames.insert(pf);
247         _parse_data->record_frame(pf);
248     }
249
250     work.insert(pf);
251     parse_frames(work,recursive);
252
253     // downgrade state if necessary
254     if(_parse_state > COMPLETE)
255         _parse_state = COMPLETE;
256
257 }
258
259 void
260 Parser::parse_at(Address target, bool recursive, FuncSource src)
261 {
262     CodeRegion * region = NULL;
263
264     parsing_printf("[%s:%d] entered parse_at(%lx)\n",FILE__,__LINE__,target);
265
266     if(_parse_state == UNPARSEABLE)
267         return;
268
269     StandardParseData * spd = dynamic_cast<StandardParseData *>(_parse_data);
270     if(!spd) {
271         parsing_printf("   parse_at is invalid on overlapping regions\n");
272         return;
273     }
274
275     region = spd->reglookup(region,target); // input region ignored for SPD
276     if(!region) {
277         parsing_printf("   failed region lookup at %lx\n",target);
278         return;
279     }
280
281     parse_at(region,target,recursive,src);
282 }
283
284 void
285 Parser::parse_vanilla()
286 {
287     LockFreeQueue<ParseFrame *> work;
288
289     parsing_printf("[%s:%d] entered parse_vanilla()\n",FILE__,__LINE__);
290     parsing_printf("\t%d function hints\n",hint_funcs.size());
291
292     if(_parse_state < PARTIAL)
293         _parse_state = PARTIAL;
294     else
295         parsing_printf("\tparse state is %d, some parsing already done\n",
296                        _parse_state);
297
298     /* Initialize parse frames from hints */
299
300     // Note: there is no fundamental obstacle to parallelizing this loop. However,
301     // race conditions need to be resolved in supporting laysrs first.
302     for (unsigned int i = 0; i < hint_funcs.size(); i++) {
303         Function * hf = hint_funcs[i];
304         ParseFrame::Status test = frame_status(hf->region(),hf->addr());
305         if(test != ParseFrame::BAD_LOOKUP &&
306            test != ParseFrame::UNPARSED)
307         {
308             parsing_printf("\tskipping repeat parse of %lx [%s]\n",
309                            hf->addr(),hf->name().c_str());
310             continue;
311         }
312
313         ParseFrame *pf = new ParseFrame(hf,_parse_data);
314         init_frame(*pf);
315         frames.insert(pf);
316         work.insert(pf);
317         _parse_data->record_frame(pf);
318     }
319
320     parse_frames(work,true);
321 }
322
323 void
324 Parser::parse_edges( vector< ParseWorkElem * > & work_elems )
325 {
326     if(_parse_state == UNPARSEABLE)
327         return;
328
329     // build up set of needed parse frames and load them with work elements
330     set<ParseFrame*> frameset; // for dup checking
331     LockFreeQueue<ParseFrame*> frames;
332
333     for (unsigned idx=0; idx < work_elems.size(); idx++) {
334
335         ParseWorkElem *elem = work_elems[idx];
336         Block *src = elem->edge()->src();
337
338         if (elem->order() == ParseWorkElem::call_fallthrough)
339         {
340             Edge *callEdge = NULL;
341             boost::lock_guard<Block> g(*src);
342             Block::edgelist trgs = src->targets();
343             for (Block::edgelist::iterator eit = trgs.begin();
344                  eit != trgs.end();
345                  eit++)
346             {
347                 if ((*eit)->type() == CALL) {
348                     callEdge = *eit;
349                     if (!(*eit)->sinkEdge()) // if it's a sink edge, look for nonsink CALL
350                         break;
351                 }
352             }
353             // create a call work elem so that the bundle is complete
354             // and set the target function's return status and 
355             // tamper to RETURN and TAMPER_NONE, respectively
356             //assert(callEdge);
357
358             // When we have a direct call to unallocated memory or garbage
359             // we there is no call edge when the call is first executed. I'm
360             // not sure why not, so I'm attempting to continue past here without
361             // fixing up the called function...
362             // Also, in the case I saw, the callee was _not_ returning. Not directly. It 
363             // led into a big case with a longjmp() equivalent. 
364
365             if (callEdge)
366             {
367                 bool isResolvable = false;
368                 Address callTarget = 0;
369                 if ( ! callEdge->sinkEdge() )
370                 {
371                     isResolvable = true;
372                     callTarget = callEdge->trg_addr();
373                     // the call target may be in another Code Object
374                     Function *callee = callEdge->trg()->obj()->findFuncByEntry(
375                             callEdge->trg()->region(), callTarget);
376                     assert(callee);
377                     callee->set_retstatus(RETURN);
378                     callee->_tamper = TAMPER_NONE;
379                 }
380                 elem->bundle()->add(new ParseWorkElem
381                                             ( elem->bundle(),
382                                               callEdge,
383                                               callTarget,
384                                               isResolvable,
385                                               false ));
386             }
387         }
388         ParseFrame *frame = _parse_data->findFrame
389                 ( src->region(),
390                   src->lastInsnAddr() );
391         bool isNewFrame = false;
392         if (!frame) {
393             vector<Function*> funcs;
394             src->getFuncs(funcs);
395             frame = new ParseFrame(*funcs.begin(),_parse_data);
396             for (unsigned fix=1; fix < funcs.size(); fix++) {
397                 // if the block is shared, all of its funcs need
398                 // to add the new edge
399                 funcs[fix]->_cache_valid = false;
400             }
401             isNewFrame = true;
402         }
403
404         // push before frame init so no seed is added
405         if (elem->bundle()) {
406             frame->work_bundles.push_back(elem->bundle());
407         }
408         frame->pushWork(elem);
409         if (isNewFrame) {
410             init_frame(*frame);
411         }
412
413         if (frameset.end() == frameset.find(frame)) {
414             frameset.insert(frame);
415             frames.insert(frame);
416         }
417     }
418     ScopeLock<Mutex<true> > L(parse_mutex);
419     // now parse
420     if(_parse_state < PARTIAL)
421         _parse_state = PARTIAL;
422     _in_parse = true;
423
424     parse_frames( frames, true );
425
426     if(_parse_state > COMPLETE)
427         _parse_state = COMPLETE;
428     _in_parse = false;
429
430     finalize();
431
432 }
433
434
435 LockFreeQueueItem<ParseFrame*> *
436 Parser::ProcessOneFrame(ParseFrame* pf, bool recursive) {
437   LockFreeQueueItem<ParseFrame*> *frame_list = 0;
438   if (pf->func && !pf->swap_busy(true)) {
439 #ifdef ADD_PARSE_FRAME_TIMERS
440     boost::timer::cpu_timer t;
441     t.start();
442 #endif
443     parse_frame(*pf,recursive);
444 #ifdef ADD_PARSE_FRAME_TIMERS
445     t.stop();
446     unsigned int msecs = floor(t.elapsed().wall / 1000000.0);
447     race_detector_fake_lock_acquire(race_detector_fake_lock(time_histogram));
448     {
449       tbb::concurrent_hash_map<unsigned int, unsigned int>::accessor a;
450       time_histogram.insert(a, msecs);
451       ++(a->second);
452     }
453     race_detector_fake_lock_release(race_detector_fake_lock(time_histogram));
454 #endif
455     frame_list = postProcessFrame(pf, recursive);
456
457     // exclusive access to each ParseFrame is mediated by marking a frame busy.
458     // we clear evidence of our access to the ParseFrame here because a concurrent
459     // thread may try to touch it because of duplicates in the work list. that
460     // won't actually be concurrent because of swap_busy. we suppress the race
461     // report by scrubbing information about our access.
462     race_detector_forget_access_history(pf, sizeof(*pf));
463
464     pf->swap_busy(false);
465   }
466   return frame_list;
467 }
468
469 LockFreeQueueItem<ParseFrame *> *Parser::postProcessFrame(ParseFrame *pf, bool recursive) {
470     LockFreeQueue<ParseFrame*> work;
471     boost::lock_guard<ParseFrame> g(*pf);
472     switch(pf->status()) {
473         case ParseFrame::CALL_BLOCKED: {
474             parsing_printf("[%s] frame %lx blocked at %lx\n",
475                            FILE__,pf->func->addr(),pf->curAddr);
476             {
477
478                 assert(pf->call_target);
479
480                 parsing_printf("    call target %lx\n",pf->call_target->addr());
481                 work.insert(pf);
482
483                 CodeRegion * cr = pf->call_target->region();
484                 Address targ = pf->call_target->addr();
485                 ParseFrame * tf = _parse_data->findFrame(cr,targ);
486                 if(!tf)
487                 {
488                     // sanity
489                     if(_parse_data->frameStatus(cr,targ) == ParseFrame::PARSED)
490                         assert(0);
491
492                     tf = new ParseFrame(pf->call_target,_parse_data);
493                     init_frame(*tf);
494                     frames.insert(tf);
495                     _parse_data->record_frame(tf);
496                 }
497                 if(likely(recursive))
498                     work.insert(tf);
499                 else {
500                     assert(0);
501                     // XXX should never get here
502                     //parsing_printf("    recursive parsing disabled\n");
503                 }
504
505             }
506             break;
507         }
508         case ParseFrame::PARSED:{
509             parsing_printf("[%s] frame %lx complete, return status: %d\n",
510                            FILE__,pf->func->addr(),pf->func->retstatus());
511
512             if (unlikely(_obj.defensiveMode() &&
513                          TAMPER_NONE != pf->func->tampersStack() &&
514                          TAMPER_NONZERO != pf->func->tampersStack() ))
515             {   // may adjust CALL_FT targets, add a frame to the worklist,
516                 // or trigger parsing in a separate target CodeObject
517                 tamper_post_processing(work,pf);
518             }
519
520             /* add waiting frames back onto the worklist */
521             resumeFrames(pf->func, work);
522
523             pf->cleanup();
524             break;
525         }
526         case ParseFrame::FRAME_ERROR:
527             parsing_printf("[%s] frame %lx error at %lx\n",
528                            FILE__,pf->func->addr(),pf->curAddr);
529             break;
530         case ParseFrame::FRAME_DELAYED:
531         {
532             boost::lock_guard<DelayedFrames> g(delayed_frames);
533             parsing_printf("[%s] frame %lx delayed at %lx\n",
534                                                                                   FILE__,
535                                                                                   pf->func->addr(),
536                                                                                   pf->curAddr);
537
538             if (pf->delayedWork.size()) {
539                 // Add frame to global delayed list
540
541                 for (auto iter = pf->delayedWork.begin();
542                      iter != pf->delayedWork.end();
543                      ++iter) {
544
545                     Function * ct = iter->second;
546                     parsing_printf("[%s] waiting on %s\n",
547                                    __FILE__,
548                                    ct->name().c_str());
549                     {
550
551                         auto fIter = delayed_frames.frames.find(ct);
552                         if (fIter == delayed_frames.frames.end()) {
553                             set<ParseFrame *> waiters;
554                             waiters.insert(pf);
555                             delayed_frames.frames[ct] = waiters;
556                         } else {
557                             delayed_frames.frames[ct].insert(pf);
558                         }
559                     }
560                 }
561             } else {
562                 // We shouldn't get here
563                 assert(0 && "Delayed frame with no delayed work");
564             }
565
566             /* if the return status of this function has been updated, add
567              * waiting frames back onto the work list */
568             resumeFrames(pf->func, work);
569             break;
570         }
571         case ParseFrame::PROGRESS:
572             // another thread is working on this already; possibly something else unblocked.
573             break;
574
575         default:
576             assert(0 && "invalid parse frame status");
577     }
578     return work.steal();
579 }
580
581
582 static void
583 InsertFrames
584 (
585  LockFreeQueueItem<ParseFrame *> *frames,
586  LockFreeQueue<ParseFrame *> *q
587 )
588 {
589   if (frames) {
590     LockFreeQueue<ParseFrame *> myq(frames);
591     q->splice(myq);
592   }
593 }
594
595
596 void
597 Parser::LaunchWork
598 (
599  LockFreeQueueItem<ParseFrame*> *frame_list,
600  bool recursive
601 )
602 {
603   LockFreeQueue<ParseFrame *> private_queue(frame_list);
604   for(;;) {
605     LockFreeQueueItem<ParseFrame *> *first = private_queue.pop();
606     if (first == 0) break;
607     ParseFrame *frame = first->value();
608     delete first;
609 #if USE_OPENMP
610 #pragma omp task firstprivate(frame, recursive)
611     SpawnProcessFrame(frame, recursive);
612 #else
613     cilk_spawn SpawnProcessFrame(frame, recursive);
614 #endif
615   }
616 }
617
618
619 void
620 Parser::SpawnProcessFrame
621 (
622  ParseFrame *pf,
623  bool recursive
624 )
625 {
626   LockFreeQueueItem<ParseFrame*> *new_frames = ProcessOneFrame(pf, recursive);
627   LaunchWork(new_frames, recursive);
628 }
629
630 void print_work_queue(LockFreeQueue<ParseFrame *> *work_queue)
631 {
632   LockFreeQueueItem<ParseFrame *> *current = work_queue->peek();
633
634   std::cout << "Work Queue" << std::endl;
635   while (current) {
636     std::cout << "  parse frame " << std::hex << current->value()
637               << std::dec << std::endl;
638     current = current->next();
639   }
640 }
641
642
643 void
644 Parser::ProcessFrames
645 (
646  LockFreeQueue<ParseFrame *> *work_queue,
647  bool recursive
648 )
649 {
650 #if USE_OPENMP
651 #pragma omp parallel shared(work_queue)
652   {
653 #pragma omp master
654     LaunchWork(work_queue->steal(), recursive);
655   }
656 #else
657   LaunchWork(work_queue->steal(), recursive);
658 #endif
659 }
660
661
662 void
663 Parser::parse_frames(LockFreeQueue<ParseFrame *> &work, bool recursive)
664 {
665     ProcessFrames(&work, recursive);
666
667     bool done = false, cycle = false;
668     {
669         boost::lock_guard<DelayedFrames> g(delayed_frames);
670         if(delayed_frames.frames.empty()) {
671             parsing_printf("[%s] Fixed point reached (0 funcs with unknown return status)\n)",
672                            __FILE__);
673             delayed_frames.size = 0;
674             done = true;
675         } else if(delayed_frames.size == delayed_frames.frames.size()) {
676             cycle = true;
677         }
678     }
679
680     // Use fixed-point to ensure we parse frames whose parsing had to be delayed
681     if (!done) {
682         if(cycle) {
683             processCycle(work,recursive);
684         } else {
685             processFixedPoint(work, recursive);
686         }
687     }
688
689     cleanup_frames();
690 }
691
692 void Parser::processFixedPoint(LockFreeQueue<ParseFrame *> &work, bool recursive) {// We haven't yet reached a fixedpoint; let's recurse
693     {
694         boost::lock_guard<DelayedFrames> g(delayed_frames);
695
696         parsing_printf("[%s] Fixed point not yet reached (%d funcs with unknown return status)\n",
697                        __FILE__,
698                        delayed_frames.frames.size());
699
700         // Update delayed_frames.size for next iteration
701         delayed_frames.size = delayed_frames.frames.size();
702
703         // Check if we can resume any frames yet
704         vector<Function *> updated;
705         for (auto iter = delayed_frames.frames.begin();
706              iter != delayed_frames.frames.end();
707              ++iter) {
708             if (iter->first->retstatus() != UNSET) {
709                 updated.push_back(iter->first);
710             }
711         }
712
713         if (updated.size()) {
714             for (auto uIter = updated.begin();
715                  uIter != updated.end();
716                  ++uIter) {
717                 resumeFrames((*uIter), work);
718             }
719         }
720     }
721     // Recurse through parse_frames
722     parsing_printf("[%s] Calling parse_frames again... \n", __FILE__);
723
724     parse_frames(work, recursive);
725 }
726
727 void Parser::processCycle(LockFreeQueue<ParseFrame *> &work, bool recursive) {// If we've reached a fixedpoint and have remaining frames, we must
728     // have a cyclic dependency
729     vector<Function *> updated;
730     {
731         boost::lock_guard<DelayedFrames> g(delayed_frames);
732         parsing_printf("[%s] Fixed point reached (%d funcs with unknown return status)\n",
733                        __FILE__,
734                        delayed_frames.frames.size());
735
736         // Mark UNSET functions in cycle as NORETURN
737         // except if we're doing non-recursive parsing.
738         // If we're just parsing one function, we want
739         // to mark everything RETURN instead.
740         for (auto iter = delayed_frames.frames.begin(); iter != delayed_frames.frames.end(); ++iter) {
741             Function * func = iter->first;
742             if (func->retstatus() == UNSET) {
743                 if(recursive)
744                 {
745                     func->set_retstatus(NORETURN);
746                     func->obj()->cs()->incrementCounter(PARSE_NORETURN_HEURISTIC);
747                 }
748                 else
749                 {
750                     func->set_retstatus(RETURN);
751                 }
752                 updated.push_back(func);
753             }
754
755             set<ParseFrame *> vec = iter->second;
756             for (auto vIter = vec.begin(); vIter != vec.end(); ++vIter) {
757                 Function * delayed = (*vIter)->func;
758                 if (delayed->retstatus() == UNSET) {
759                     delayed->set_retstatus(NORETURN);
760                     delayed->obj()->cs()->incrementCounter(PARSE_NORETURN_HEURISTIC);
761                     updated.push_back(func);
762                 }
763             }
764         }
765
766     }
767
768     // We should have updated the return status of one or more frames; recurse
769     if (updated.size()) {
770         for (auto uIter = updated.begin();
771              uIter != updated.end();
772              ++uIter) {
773             resumeFrames((*uIter), work);
774         }
775
776         if (work.peek()) {
777             parsing_printf("[%s] Updated retstatus of delayed frames, trying again...\n", __FILE__);
778             parse_frames(work, recursive);
779         }
780     } else {
781         // We shouldn't get here
782         parsing_printf("[%s] No more work can be done (%d funcs with unknown return status)\n",
783                        __FILE__,
784                        delayed_frames.frames.size());
785         assert(0);
786     }
787 }
788
789 void Parser::cleanup_frames()  {
790   vector <ParseFrame *> pfv;
791   std::copy(frames.begin(), frames.end(), std::back_inserter(pfv));
792 #if USE_OPENMP
793 #pragma omp parallel for schedule(auto)
794   for (unsigned int i = 0; i < pfv.size(); i++) {
795     ParseFrame *pf = pfv[i];
796     if (pf) {
797       _parse_data->remove_frame(pf);
798       delete pf;
799     }
800   }
801   frames.clear();
802 #elif USE_CILK
803   cilk_for(unsigned i=0; i < pfv.size(); ++i) {
804     ParseFrame *pf = pfv[i];
805     if (pf) {
806       _parse_data->remove_frame(pf);
807       delete pf;
808     }
809   }
810 #else
811   for(unsigned i=0; i < pfv.size(); ++i) {
812     ParseFrame *pf = pfv[i];
813     if (pf) {
814       _parse_data->remove_frame(pf);
815       delete pf;
816     }
817   }
818 #endif
819   frames.clear();
820 }
821
822 /* Finalizing all functions for consumption:
823   
824    - Finish delayed parsing
825    - Prepare and record FuncExtents for range-based lookup
826 */
827
828 // Could set cache_valid depending on whether the function is currently
829 // being parsed somewhere. 
830
831 void Parser::finalize_block(Block* b, Function* owner) {
832     Address addr = b->start();
833     Address prev_insn = b->lastInsnAddr();
834     set<Block *> overlap;
835     CodeRegion *cr;
836     if (owner->region()->contains(addr))
837         cr = owner->region();
838     else
839         cr = _parse_data->reglookup(owner->region(), addr);
840
841     Block *exist = _parse_data->findBlock(cr, addr);
842     if (NULL == exist) {
843         _parse_data->findBlocks(cr, addr, overlap);
844         if (overlap.size() > 1)
845             parsing_printf("[%s] address %lx overlapped by %d blocks\n",
846                            FILE__, addr, overlap.size());
847
848         /* Platform specific consistency test:
849            generally checking for whether the address is an
850            instruction boundary in a block */
851         for (set<Block *>::iterator sit = overlap.begin(); sit != overlap.end(); ++sit) {
852             Block *ob = *sit;
853
854             // consistent will fill in prev_insn with the address of the
855             // instruction preceeding addr if addr is consistent
856             if (ob->consistent(addr, prev_insn)) {
857                 exist = b;
858                 break;
859             } else {
860                 parsing_printf("[%s] %lx is inconsistent with [%lx,%lx)\n",
861                                FILE__, addr, b->start(), b->end());
862                 _pcb.overlapping_blocks(ob, b);
863             }
864         }
865
866     }
867     if (exist) {
868         record_block(exist);
869         if (exist->start() != addr) {
870             Block *ret = split_block(owner, b, addr, prev_insn);
871             record_block(ret);
872             
873             ParseFrame * tf = _parse_data->findFrame(cr,owner->addr());
874             tf->leadersToBlock[ret->start()] = ret;
875             tf->visited[ret->start()] = true;
876
877         }
878     }
879     for (auto e = b->targets().begin();
880          e != b->targets().end();
881          ++e)
882     {
883         auto t = (*e)->type();
884         if(t == CALL_FT ||
885            t == COND_NOT_TAKEN ||
886            t == FALLTHROUGH)
887         {
888             auto off = (*e)->_target_off;
889             assert(off >= b->end());
890             assert((*e)->trg()->start() == off);
891         }
892     }
893     _pcb.addBlock(owner, b);
894
895 }
896
897 void
898 Parser::finalize(Function *f)
899 {
900     if(f->_cache_valid) {
901         return;
902     }
903
904     if(!f->_parsed) {
905         parsing_printf("[%s:%d] Parser::finalize(f[%lx]) "
906                                "forced parsing\n",
907                        FILE__,__LINE__,f->addr());
908         parse_at(f->region(), f->addr(), true, f->src());
909     }
910
911     bool cache_value = true;
912     boost::unique_lock<boost::mutex> finalize_write_lock(finalize_mutex);
913     /* this is commented out to prevent a failure in tampersStack, but
914            this may be an incorrect approach to fixing the problem.
915     if(frame_status(f->region(), f->addr()) < ParseFrame::PARSED) {
916         // XXX prevent caching of blocks, extents for functions that
917         // are actively being parsed. This prevents callbacks and other
918         // functions called from within, e.g. parse_frame from setting
919         // the caching flag and preventing later updates to the blocks()
920         // vector during finalization.
921         cache_value = false;
922     }*/
923
924     parsing_printf("[%s] finalizing %s (%lx)\n",
925                    FILE__,f->name().c_str(),f->addr());
926
927     region_data * rd = _parse_data->findRegion(f->region());
928     assert(rd);
929
930     // finish delayed parsing and sorting
931     Function::blocklist blocks = f->blocks_int();
932
933     // is this the first time we've parsed this function?
934     if (unlikely( !f->_extents.empty() )) {
935         _parse_data->remove_extents(f->_extents);
936         f->_extents.clear();
937     }
938
939
940     if(blocks.empty()) {
941         f->_cache_valid = cache_value; // see above
942         return;
943     }
944
945     auto bit = blocks.begin();
946     FuncExtent * ext = NULL;
947     Address ext_s = (*bit)->start();
948     Address ext_e = ext_s;
949
950     for( ; bit != blocks.end(); ++bit) {
951         Block * b = *bit;
952         finalize_block(b, f);
953         if(b->start() > ext_e) {
954             ext = new FuncExtent(f,ext_s,ext_e);
955
956             // remove access history for ext before publishing it
957             // to concurrent readers to avoid false race reports.
958             // ext is written before it is published and only read
959             // thereafter.
960             race_detector_forget_access_history(ext, sizeof(*ext));
961
962             parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
963             f->_extents.push_back(ext);
964             rd->funcsByRange.insert(ext);
965             ext_s = b->start();
966         }
967         ext_e = b->end();
968     }
969     ext = new FuncExtent(f,ext_s,ext_e);
970
971     // remove access history for ext before publishing it
972     // to concurrent readers to avoid false race reports.
973     // ext is written before it is published and only read
974     // thereafter.
975     race_detector_forget_access_history(ext, sizeof(*ext));
976
977     parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
978     rd->funcsByRange.insert(ext);
979     f->_extents.push_back(ext);
980
981     f->_cache_valid = cache_value; // see comment at function entry
982
983     if (unlikely( f->obj()->defensiveMode())) {
984         // add fallthrough edges for calls assumed not to be returning
985         // whose fallthrough blocks we parsed anyway (this happens if 
986         // the FT block is also a branch target)
987         Function::edgelist & edges = f->_call_edge_list;
988         for (Function::edgelist::iterator eit = edges.begin();
989              eit != edges.end();
990              eit++)
991         {
992             boost::lock_guard<Block> g(*(*eit)->src());
993             if (2 > (*eit)->src()->targets().size()) {
994                 Block *ft = _parse_data->findBlock((*eit)->src()->region(),
995                                                    (*eit)->src()->end());
996                 if (ft && HASHDEF(f->_bmap,ft->start())) {
997                     link((*eit)->src(),ft,CALL_FT,false);
998                 }
999             }
1000         }
1001     }
1002 }
1003
1004 void
1005 Parser::finalize()
1006 {
1007     if(_parse_state < FINALIZED) {
1008         finalize_funcs(hint_funcs);
1009         finalize_funcs(discover_funcs);
1010         _parse_state = FINALIZED;
1011     }
1012 }
1013
1014 void
1015 Parser::finalize_funcs(vector<Function *> &funcs)
1016 {
1017     vector<Function *>::iterator fit = funcs.begin();
1018     for( ; fit != funcs.end(); ++fit) {
1019         finalize(*fit);
1020     }
1021 }
1022
1023 void
1024 Parser::record_func(Function *f)
1025 {
1026     if(!f) return;
1027
1028     if(f->src() == HINT)
1029         hint_funcs.push_back(f);
1030     else
1031         discover_funcs.push_back(f);
1032
1033     sorted_funcs.insert(f);
1034
1035     _parse_data->record_func(f);
1036 }
1037
1038 void
1039 Parser::init_frame(ParseFrame & frame)
1040 {
1041     boost::lock_guard<ParseFrame> g(frame);
1042     Block * b = NULL;
1043     Block * split = NULL;
1044
1045     if ( ! frame.func->_entry )
1046     {
1047         // Find or create a block
1048         b = block_at(frame, frame.func, frame.func->addr(),split);
1049         if(b) {
1050             frame.leadersToBlock[frame.func->addr()] = b;
1051             frame.func->_entry = b;
1052             frame.seed = new ParseWorkElem(NULL,NULL,frame.func->addr(),true,false);
1053             frame.pushWork(frame.seed);
1054         } else {
1055             parsing_printf("[%s] failed to initialize parsing frame\n",
1056                            FILE__);
1057             return;
1058         }
1059         if (split) {
1060             _pcb.splitBlock(split,b);
1061         }
1062     }
1063
1064     // FIXME these operations should move into the actual parsing
1065     Address ia_start = frame.func->addr();
1066     unsigned size =
1067             frame.codereg->offset() + frame.codereg->length() - ia_start;
1068     const unsigned char* bufferBegin =
1069             (const unsigned char *)(frame.func->isrc()->getPtrToInstruction(ia_start));
1070     InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
1071     InstructionAdapter_t* ah = InstructionAdapter_t::makePlatformIA_IAPI(obj().cs()->getArch(),
1072                                                                          dec, ia_start, frame.func->obj(),
1073                                                                          frame.codereg, frame.func->isrc(), b);
1074     if(ah->isStackFramePreamble()) {
1075         frame.func->_no_stack_frame = false;
1076     }
1077     frame.func->_saves_fp = ah->savesFP();
1078     delete ah;
1079 }
1080
1081 void ParseFrame::cleanup()
1082 {
1083     boost::lock_guard<ParseFrame> g(*this);
1084     for(unsigned i=0;i<work_bundles.size();++i)
1085         delete work_bundles[i];
1086     work_bundles.clear();
1087     if(seed)
1088         delete seed;
1089     seed = NULL;
1090 }
1091
1092 namespace {
1093     inline Edge * bundle_call_edge(ParseWorkBundle * b)
1094     {
1095         if(!b) return NULL;
1096
1097         vector<ParseWorkElem*> const& elems = b->elems();
1098         vector<ParseWorkElem*>::const_iterator it = elems.begin();
1099         for( ; it != elems.end(); ++it) {
1100             if((*it)->edge()->type() == CALL)
1101                 return (*it)->edge();
1102         }
1103         return NULL;
1104     }
1105
1106     /* 
1107      * Look up the next block for detection of straight-line
1108      * fallthrough edges into existing blocks.
1109      */
1110 }
1111
1112 void
1113 Parser::parse_frame(ParseFrame & frame, bool recursive) {
1114     boost::lock_guard<Function> g(*frame.func);
1115     /** Persistent intermediate state **/
1116     InstructionAdapter_t *ahPtr = NULL;
1117     ParseFrame::worklist_t & worklist = frame.worklist;
1118     dyn_hash_map<Address, Block *> & leadersToBlock = frame.leadersToBlock;
1119     Address & curAddr = frame.curAddr;
1120     Function * func = frame.func;
1121     dyn_hash_map<Address, bool> & visited = frame.visited;
1122     unsigned & num_insns = frame.num_insns;
1123     func->_cache_valid = false;
1124
1125     /** Non-persistent intermediate state **/
1126     Address nextBlockAddr;
1127     Block * nextBlock;
1128
1129     if (frame.status() == ParseFrame::UNPARSED) {
1130         parsing_printf("[%s] ==== starting to parse frame %lx ====\n",
1131                        FILE__,frame.func->addr());
1132         // prevents recursion of parsing
1133         frame.func->_parsed = true;
1134     } else {
1135         parsing_printf("[%s] ==== resuming parse of frame %lx ====\n",
1136                        FILE__,frame.func->addr());
1137         // Pull work that can be resumed off the delayedWork list
1138         std::map<ParseWorkElem *, Function *>::iterator iter;
1139
1140         vector<ParseWorkElem *> updated;
1141         vector<ParseWorkElem *>::iterator uIter;
1142
1143         for (iter = frame.delayedWork.begin();
1144              iter != frame.delayedWork.end();
1145              ++iter) {
1146             if (iter->second->retstatus() != UNSET) {
1147                 frame.pushWork(iter->first);
1148                 updated.push_back(iter->first);
1149             }
1150         }
1151         for (uIter = updated.begin(); uIter != updated.end(); ++uIter) {
1152             frame.delayedWork.erase(*uIter);
1153         }
1154     }
1155
1156
1157     frame.set_status(ParseFrame::PROGRESS);
1158
1159     while(!worklist.empty()) {
1160
1161         Block * cur = NULL;
1162         ParseWorkElem * work = frame.popWork();
1163         if (work->order() == ParseWorkElem::call) {
1164             parsing_printf("Handling call work element\n");
1165             Function * ct = NULL;
1166             
1167             // If we're not doing recursive traversal, skip *all* of the call edge processing.
1168             // We don't want to create a stub. We'll handle the assumption of a fallthrough
1169             // target for the fallthrough work element, not the call work element. Catch blocks
1170             // will get excluded but that's okay; our heuristic is not reliable enough that I'm willing to deploy
1171             // it when we'd only be detecting a non-returning callee by name anyway.
1172             // --BW 12/2012
1173             if (!recursive) {
1174                 parsing_printf("[%s] non-recursive parse skipping call %lx->%lx\n",
1175                                FILE__, work->edge()->src()->lastInsnAddr(), work->target());
1176                 continue;
1177             }
1178
1179             ct = _parse_data->findFunc(frame.codereg, work->target());
1180             if (ct == NULL) {
1181                 FuncSource how = RT;
1182                 if(frame.func->_src == GAP || frame.func->_src == GAPRT)
1183                     how = GAPRT;
1184                 
1185                 ct = _parse_data->get_func(frame.codereg,work->target(),how);
1186                 _pcb.discover_function(ct);
1187             }
1188             bool frame_parsing_not_started = 
1189                 (frame_status(ct->region(),ct->addr())==ParseFrame::UNPARSED ||
1190                  frame_status(ct->region(),ct->addr())==ParseFrame::BAD_LOOKUP);
1191             parsing_printf("\tframe %lx, UNPARSED: %d, BAD_LOOKUP %d\n", ct->addr(), frame_status(ct->region(),ct->addr())==ParseFrame::UNPARSED,frame_status(ct->region(),ct->addr())==ParseFrame::BAD_LOOKUP);
1192
1193             if (!frame_parsing_not_started && !work->callproc()) {
1194                 parsing_printf("[%s] binding call (call target should have been parsed) %lx->%lx\n",
1195                         FILE__,work->edge()->src()->lastInsnAddr(),work->target());
1196                 pair<Function*,Edge*> ctp =
1197                         bind_call(frame,
1198                                   work->target(),
1199                                   work->edge()->src(),
1200                                   work->edge());
1201                 work->mark_call();
1202             }
1203
1204             if (recursive && ct && frame_parsing_not_started) {
1205                 // suspend this frame and parse the next
1206                 parsing_printf("    [suspend frame %lx]\n", func->addr());
1207                 frame.call_target = ct;
1208                 frame.set_status(ParseFrame::CALL_BLOCKED);
1209                 // need to re-visit this edge
1210                 frame.pushWork(work);
1211                 if (ahPtr) delete ahPtr;
1212                 return;
1213             } else if (ct && work->tailcall()) {
1214                 // XXX The target has been or is currently being parsed (else
1215                 //     the previous conditional would have been taken),
1216                 //     so if its return status is unset then this
1217                 //     function has to take UNKNOWN
1218                 if (func->retstatus() != RETURN) {
1219                     if (ct->retstatus() > NORETURN)
1220                       func->set_retstatus(ct->retstatus());
1221                     else if (ct->retstatus() == UNSET)
1222                       frame.pushDelayedWork(work, ct);
1223                 }
1224             }
1225
1226             continue;
1227         } else if (work->order() == ParseWorkElem::call_fallthrough) {
1228             // check associated call edge's return status
1229             Edge * ce = bundle_call_edge(work->bundle());
1230             if (!ce) {
1231                 // odd; no call edge in this bundle
1232                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
1233                                FILE__,work->edge()->src()->lastInsnAddr());
1234             } else {
1235                 // check for system call FT
1236                 Edge* edge = work->edge();
1237                 Block::Insns blockInsns;
1238 //                boost::lock_guard<Block> src_guard(*edge->src());
1239                 edge->src()->getInsns(blockInsns);
1240                 auto prev = blockInsns.rbegin();
1241                 InstructionAPI::Instruction prevInsn = prev->second;
1242                 bool is_nonret = false;
1243
1244                 if (prevInsn.getOperation().getID() == e_syscall) {
1245                     Address src = edge->src()->lastInsnAddr();
1246
1247
1248                     // Need to determine if system call is non-returning
1249                     long int syscallNumber;
1250                     if (!getSyscallNumber(func, edge->src(), src, frame.codereg->getArch(), syscallNumber)) {
1251                         // If we cannot retrieve a syscall number, assume the syscall returns
1252                         parsing_printf("[%s] could not retrieve syscall edge, assuming returns\n", FILE__);
1253                     } else {
1254                         if (obj().cs()->nonReturningSyscall(syscallNumber)) {
1255                             is_nonret = true;
1256                         }
1257                     }
1258
1259                     if (is_nonret) {
1260                         parsing_printf("[%s] no fallthrough for non-returning syscall\n",
1261                                        FILE__,
1262                                        work->edge()->src()->lastInsnAddr());
1263
1264                         // unlink tempsink fallthrough edge
1265                         Edge * remove = work->edge();
1266                         remove->src()->removeTarget(remove);
1267                         factory().destroy_edge(remove, destroyed_noreturn);
1268                         continue;
1269                     }
1270                 } else if (ce->trg()) {
1271                     Address target = ce->trg_addr();
1272                     Function * ct = _parse_data->findFunc(frame.codereg,target);
1273                     bool is_plt = false;
1274
1275                     // check if associated call edge's return status is still unknown
1276                     if (ct && (ct->retstatus() == UNSET) ) {
1277                         // Delay parsing until we've finished the corresponding call edge
1278                         parsing_printf("[%s] Parsing FT edge %lx, corresponding callee (%s) return status unknown; delaying work\n",
1279                                        __FILE__,
1280                                        work->edge()->src()->lastInsnAddr(),
1281                                        ct->name().c_str());
1282
1283                         // Add work to ParseFrame's delayed list
1284                         frame.pushDelayedWork(work, ct);
1285
1286                         // Continue other work for this frame
1287                         continue;
1288                     }
1289
1290                     is_plt = HASHDEF(plt_entries,target);
1291
1292                     // CodeSource-defined tests 
1293                     is_nonret = obj().cs()->nonReturning(target);
1294                     if (is_nonret) {
1295                         parsing_printf("\t Disallowing FT edge: CodeSource reports nonreturning\n");
1296                     }
1297                     if (!is_nonret && is_plt) {
1298                         is_nonret |= obj().cs()->nonReturning(plt_entries[target]);
1299                         if (is_nonret) {
1300                             parsing_printf("\t Disallowing FT edge: CodeSource reports PLT nonreturning\n");
1301                         }
1302                     }
1303                     // Parsed return status tests
1304                     if (!is_nonret && !is_plt && ct) {
1305                         is_nonret |= (ct->retstatus() == NORETURN);
1306                         if (is_nonret) {
1307                             parsing_printf("\t Disallowing FT edge: function is non-returning\n");
1308                         }
1309                     }
1310                     // Call-stack tampering tests
1311                     if (unlikely(!is_nonret && frame.func->obj()->defensiveMode() && ct)) {
1312                         is_nonret |= (ct->retstatus() == UNKNOWN);
1313                         if (is_nonret) {
1314                             parsing_printf("\t Disallowing FT edge: function in "
1315                                                    "defensive binary may not return\n");
1316                             mal_printf("Disallowing FT edge: function %lx in "
1317                                                "defensive binary may not return\n", ct->addr());
1318                         } else {
1319                             StackTamper ct_tamper = ct->tampersStack();
1320                             is_nonret |= (TAMPER_NONZERO == ct_tamper);
1321                             is_nonret |= (TAMPER_ABS == ct_tamper);
1322                             if (is_nonret) {
1323                                 mal_printf("Disallowing FT edge: function at %lx "
1324                                                    "tampers with its stack\n", ct->addr());
1325                                 parsing_printf("\t Disallowing FT edge: function "
1326                                                        "tampers with its stack\n");
1327                             }
1328                         }
1329                     }
1330                     if (is_nonret) {
1331                         parsing_printf("[%s] no fallthrough for non-returning call "
1332                                                "to %lx at %lx\n",FILE__,target,
1333                                        work->edge()->src()->lastInsnAddr());
1334
1335                         // unlink tempsink fallthrough edge
1336                         Edge * remove = work->edge();
1337                         remove->src()->removeTarget(remove);
1338                         factory().destroy_edge(remove, destroyed_noreturn);
1339                         continue;
1340                     } else
1341                         // Invalidate cache_valid for all sharing functions
1342                         invalidateContainingFuncs(func, ce->src());
1343                 }
1344             }
1345         } else if (work->order() == ParseWorkElem::seed_addr) {
1346             cur = leadersToBlock[work->target()];
1347         } else if (work->order() == ParseWorkElem::resolve_jump_table) {
1348             // resume to resolve jump table
1349             auto work_ah = work->ah();
1350             parsing_printf("... continue parse indirect jump at %lx\n", work_ah->getAddr());
1351             Block *nextBlock = work->cur();
1352             if (nextBlock->last() != work_ah->getAddr()) {
1353                 // The block has been split
1354                 region_data * rd = _parse_data->findRegion(frame.codereg);
1355                 set<Block*> blocks;
1356                 rd->blocksByRange.find(work_ah->getAddr(), blocks);
1357                 for (auto bit = blocks.begin(); bit != blocks.end(); ++bit) {
1358                     if ((*bit)->last() == work_ah->getAddr()) {
1359                         nextBlock = (*bit);
1360                         break;
1361                     }
1362                 }
1363
1364             }
1365             ProcessCFInsn(frame,nextBlock,work->ah());
1366             continue;
1367         }
1368             // call fallthrough case where we have already checked that
1369             // the target returns. this is used in defensive mode.
1370         else if (work->order() == ParseWorkElem::checked_call_ft) {
1371             Edge* ce = bundle_call_edge(work->bundle());
1372             if (ce != NULL) {
1373                 invalidateContainingFuncs(func, ce->src());
1374             } else {
1375                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
1376                                FILE__,work->edge()->src()->lastInsnAddr());
1377             }
1378         }
1379
1380         if (NULL == cur) {
1381             pair<Block*,Edge*> newedge =
1382                     add_edge(frame,
1383                              frame.func,
1384                              work->edge()->src(),
1385                              work->target(),
1386                              work->edge()->type(),
1387                              work->edge());
1388             cur = newedge.first;
1389         }
1390
1391         if (HASHDEF(visited,cur->start()))
1392         {
1393             parsing_printf("[%s] skipping locally parsed target at %lx\n",
1394                            FILE__,work->target());
1395             continue;
1396         }
1397         visited[cur->start()] = true;
1398         leadersToBlock[cur->start()] = cur;
1399
1400         if (!cur->_parsed)
1401         {
1402             parsing_printf("[%s] parsing block %lx\n",
1403                            FILE__,cur->start());
1404             if (frame.func->obj()->defensiveMode()) {
1405                 mal_printf("new block at %lx (0x%lx)\n",cur->start(), cur);
1406             }
1407
1408             cur->_parsed = true;
1409             curAddr = cur->start();
1410         } else {
1411             parsing_printf("[%s] deferring parse of shared block %lx\n",
1412                            FILE__,cur->start());
1413             if (func->retstatus() < UNKNOWN) {
1414                 // we've parsed into another function, if we've parsed
1415                 // into it's entry point, set retstatus to match it
1416                 Function * other_func = _parse_data->findFunc(
1417                         func->region(), cur->start());
1418                 if (other_func && other_func->retstatus() > UNKNOWN) {
1419                     func->set_retstatus(other_func->retstatus());
1420                 } else {
1421                     func->set_retstatus(UNKNOWN);
1422                 }
1423             }
1424             // The edge to this shared block is changed from
1425             // "going to sink" to going to this shared block.
1426             // This changes the function boundary, so we need to
1427             // invalidate the cache.
1428             func->_cache_valid = false;
1429             continue;
1430         }
1431
1432         /*
1433          * External consumers of parsing may have invoked finalizing
1434          * methods from callback routines. Ensure that the caches
1435          * are invalidated because a new block is being added to the
1436          * function's view
1437          */
1438         func->_cache_valid = false;
1439
1440         // NB Using block's region() here because it may differ from the
1441         //    function's if control flow has jumped to another region
1442         unsigned size =
1443                 cur->region()->offset() + cur->region()->length() - curAddr;
1444         const unsigned char* bufferBegin =
1445                 (const unsigned char *)(func->isrc()->getPtrToInstruction(curAddr));
1446         InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
1447
1448         if (!ahPtr)
1449             ahPtr = InstructionAdapter_t::makePlatformIA_IAPI(func->obj()->cs()->getArch(), dec, curAddr, func->obj(),
1450                                                               cur->region(), func->isrc(), cur);
1451         else
1452             ahPtr->reset(dec,curAddr,func->obj(),
1453                          cur->region(), func->isrc(), cur);
1454
1455         InstructionAdapter_t * ah = ahPtr;
1456         
1457         nextBlockAddr = std::numeric_limits<Address>::max();
1458         for (auto nextBlockIter = frame.leadersToBlock.begin(); 
1459                 nextBlockIter != frame.leadersToBlock.end();
1460                 ++nextBlockIter)
1461             if (nextBlockIter->first > frame.curAddr && nextBlockIter->first < nextBlockAddr) {
1462                 nextBlockAddr = nextBlockIter->first;
1463                 nextBlock = nextBlockIter->second;
1464             }
1465         bool isNopBlock = ah->isNop();
1466
1467         while(true) {
1468             curAddr = ah->getAddr();
1469             /** Check for straight-line fallthrough **/
1470             if (curAddr == nextBlockAddr) {
1471                 parsing_printf("[%s] straight-line parse into block at %lx\n",
1472                                FILE__,curAddr);
1473                 if (frame.func->obj()->defensiveMode()) {
1474                     mal_printf("straight-line parse into block at %lx\n",curAddr);
1475                 }
1476                 ah->retreat();
1477                 curAddr = ah->getAddr();
1478
1479                 end_block(cur,ahPtr);
1480                 pair<Block*,Edge*> newedge =
1481                         add_edge(frame,frame.func,cur,
1482                                  nextBlockAddr,FALLTHROUGH,NULL);
1483
1484                 if (!HASHDEF(visited,nextBlockAddr) &&
1485                     !HASHDEF(leadersToBlock,nextBlockAddr)) {
1486                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1487                                    FILE__,__LINE__,nextBlockAddr);
1488
1489                     frame.pushWork(
1490                             frame.mkWork(
1491                                     NULL,
1492                                     newedge.second,
1493                                     nextBlockAddr,
1494                                     true,
1495                                     false)
1496                     );
1497                     /* preserved as example of leaky code; use mkWork instead
1498                     frame.pushWork(
1499                         new ParseWorkElem(
1500                             NULL,
1501                             newedge.second,
1502                             nextBlockAddr,
1503                             true,
1504                             false)
1505                         );
1506                     */
1507                     leadersToBlock[nextBlockAddr] = nextBlock;
1508                 }
1509                 break;
1510             } else if (curAddr > nextBlockAddr) {
1511                 parsing_printf("[%s:%d] inconsistent instruction stream: "
1512                                        "%lx is within [%lx,%lx)\n",
1513                                FILE__,__LINE__,curAddr,
1514                                nextBlock->start(),nextBlock->end());
1515                 Address prev_insn;
1516                 if (nextBlock->consistent(curAddr, prev_insn)) {
1517                     // The two overlapping blocks aligned.
1518                     // We need to split the large block, and create new edge to the later block
1519                     Block* new_block = split_block(frame.func, nextBlock, curAddr, prev_insn);
1520                     leadersToBlock[curAddr] = new_block;
1521                     visited[curAddr] = true;
1522                     ah->retreat();
1523                     end_block(cur, ah);
1524
1525                     add_edge(frame, frame.func, cur, curAddr, FALLTHROUGH, NULL);
1526
1527
1528
1529                     // We break from this loop because no need more stright-line parsing
1530                     break;
1531                 }
1532
1533                 // NB "cur" hasn't ended, so its range may
1534                 // not look like it overlaps with nextBlock
1535                 _pcb.overlapping_blocks(cur,nextBlock);
1536
1537                 tie(nextBlockAddr,nextBlock) =
1538                         func->get_next_block(frame.curAddr, frame.codereg);
1539             }
1540
1541             // per-instruction callback notification
1542             ParseCallback::insn_details insn_det;
1543             insn_det.insn = ah;
1544
1545             parsing_printf("[%s:%d] curAddr 0x%lx: %s \n",
1546                            FILE__,__LINE__,curAddr, insn_det.insn->getInstruction().format().c_str() );
1547
1548             if (func->_is_leaf_function) {
1549                 Address ret_addr;
1550                 func->_is_leaf_function = !(insn_det.insn->isReturnAddrSave(ret_addr));
1551                 parsing_printf("[%s:%d] leaf %d funcname %s \n",
1552                                FILE__,__LINE__,func->_is_leaf_function, func->name().c_str());
1553                 if (!func->_is_leaf_function) func->_ret_addr = ret_addr;
1554             }
1555
1556             _pcb.instruction_cb(func,cur,curAddr,&insn_det);
1557
1558             if (isNopBlock && !ah->isNop()) {
1559                 ah->retreat();
1560
1561                 end_block(cur,ahPtr);
1562                 pair<Block*,Edge*> newedge =
1563                         add_edge(frame,frame.func,cur,curAddr,FALLTHROUGH,NULL);
1564                 Block * targ = newedge.first;
1565
1566                 parsing_printf("[%s:%d] nop-block ended at %lx\n",
1567                                FILE__,__LINE__,curAddr);
1568                 if (targ && !HASHDEF(visited,targ->start())) {
1569                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1570                                    FILE__,__LINE__,targ->start());
1571
1572                     frame.pushWork(
1573                             frame.mkWork(
1574                                     NULL,
1575                                     newedge.second,
1576                                     targ->start(),
1577                                     true,
1578                                     false)
1579                     );
1580                     leadersToBlock[targ->start()] = targ;
1581                 }
1582                 break;
1583             }
1584
1585             /** Particular instruction handling (calls, branches, etc) **/
1586             ++num_insns;
1587
1588             if(ah->hasCFT()) {
1589                 if (ah->isIndirectJump()) {
1590                     // Create a work element to represent that
1591                     // we will resolve the jump table later
1592                     end_block(cur,ahPtr);
1593                     frame.pushWork( frame.mkWork( work->bundle(), cur, ahPtr));
1594                 } else {
1595                     ProcessCFInsn(frame,cur,ahPtr);
1596                 }
1597                 break;
1598             } else if (func->_saves_fp &&
1599                        func->_no_stack_frame &&
1600                        ah->isFrameSetupInsn()) { // isframeSetup is expensive
1601                 func->_no_stack_frame = false;
1602             } else if (ah->isLeave()) {
1603                 func->_no_stack_frame = false;
1604             } else if ( ah->isAbort() ) {
1605                 // 4. `abort-causing' instructions
1606                 end_block(cur,ahPtr);
1607                 //link(cur, sink_block, DIRECT, true);
1608                 break;
1609             } else if ( ah->isInvalidInsn() ) {
1610                 // 4. Invalid or `abort-causing' instructions
1611                 end_block(cur,ahPtr);
1612                 link(cur, _sink, DIRECT, true);
1613                 break;
1614             } else if ( ah->isInterruptOrSyscall() ) {
1615                 // 5. Raising instructions
1616                 end_block(cur,ahPtr);
1617
1618                 pair<Block*,Edge*> newedge =
1619                         add_edge(frame,frame.func,cur,ah->getNextAddr(),FALLTHROUGH,NULL);
1620                 Block * targ = newedge.first;
1621
1622                 if (targ && !HASHDEF(visited,targ->start()) &&
1623                     !HASHDEF(leadersToBlock,targ->start())) {
1624                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1625                                    FILE__,__LINE__,targ->start());
1626
1627                     frame.pushWork(
1628                             frame.mkWork(
1629                                     NULL,
1630                                     newedge.second,
1631                                     targ->start(),
1632                                     true,
1633                                     false)
1634                     );
1635                     leadersToBlock[targ->start()] = targ;
1636                 }
1637                 if (unlikely(func->obj()->defensiveMode())) {
1638                     fprintf(stderr,"parsed bluepill insn sysenter or syscall "
1639                             "in defensive mode at %lx\n",curAddr);
1640                 }
1641                 break;
1642             } else if (unlikely(func->obj()->defensiveMode())) {
1643                 if (!_pcb.hasWeirdInsns(func) && ah->isGarbageInsn()) {
1644                     // add instrumentation at this addr so we can
1645                     // extend the function if this really executes
1646                     ParseCallback::default_details det(
1647                             (unsigned char*) cur->region()->getPtrToInstruction(cur->lastInsnAddr()),
1648                             cur->end() - cur->lastInsnAddr(),
1649                             true);
1650                     _pcb.abruptEnd_cf(cur->lastInsnAddr(),cur,&det);
1651                     _pcb.foundWeirdInsns(func);
1652                     end_block(cur,ahPtr);
1653                     // allow invalid instructions to end up as a sink node.
1654                     link(cur, _sink, DIRECT, true);
1655                     break;
1656                 } else if (ah->isNopJump()) {
1657                     // patch the jump to make it a nop, and re-set the 
1658                     // instruction adapter so we parse the instruction
1659                     // as a no-op this time, allowing the subsequent
1660                     // instruction to be parsed correctly
1661                     mal_printf("Nop jump at %lx, changing it to nop\n",ah->getAddr());
1662                     _pcb.patch_nop_jump(ah->getAddr());
1663                     unsigned bufsize =
1664                             func->region()->offset() + func->region()->length() - ah->getAddr();
1665                     func->region()->offset() + func->region()->length() - ahPtr->getAddr();
1666                     const unsigned char* bufferBegin = (const unsigned char *)
1667                             (func->isrc()->getPtrToInstruction(ah->getAddr()));
1668                     dec = InstructionDecoder
1669                             (bufferBegin, bufsize, frame.codereg->getArch());
1670                     ah->reset(dec, curAddr, func->obj(),
1671                               func->region(), func->isrc(), cur);
1672                 } else {
1673                     entryID id = ah->getInstruction().getOperation().getID();
1674                     switch (id) {
1675                         case e_rdtsc:
1676                             fprintf(stderr,"parsed bluepill insn rdtsc at %lx\n",curAddr);
1677                             break;
1678                         case e_sldt:
1679                             fprintf(stderr,"parsed bluepill insn sldt at %lx\n",curAddr);
1680                             break;
1681                         default:
1682                             break;
1683                     }
1684                 }
1685             } else {
1686                 // default
1687             }
1688
1689             /** Check for overruns of valid address space **/
1690             if (!is_code(func,ah->getNextAddr())) {
1691                 parsing_printf("[%s] next insn %lx is invalid\n",
1692                                FILE__,ah->getNextAddr());
1693
1694                 end_block(cur,ahPtr);
1695                 // We need to tag the block with a sink edge
1696                 link(cur, _sink, DIRECT, true);
1697                 break;
1698             } else if (!cur->region()->contains(ah->getNextAddr())) {
1699                 parsing_printf("[%s] next address %lx is outside [%lx,%lx)\n",
1700                                FILE__,ah->getNextAddr(),
1701                                cur->region()->offset(),
1702                                cur->region()->offset()+cur->region()->length());
1703                 end_block(cur,ahPtr);
1704                 // We need to tag the block with a sink edge
1705                 link(cur, _sink, DIRECT, true);
1706                 break;
1707             }
1708             ah->advance();
1709         }
1710     }
1711     if (ahPtr) delete ahPtr;
1712     // Check if parsing is complete
1713     if (!frame.delayedWork.empty()) {
1714         frame.set_status(ParseFrame::FRAME_DELAYED);
1715         return;
1716     }
1717
1718     /** parsing complete **/
1719     if (HASHDEF(plt_entries,frame.func->addr())) {
1720 //        if (obj().cs()->nonReturning(frame.func->addr())) {
1721         if (obj().cs()->nonReturning(plt_entries[frame.func->addr()])) {
1722             frame.func->set_retstatus(NORETURN);
1723         } else {
1724             frame.func->set_retstatus(UNKNOWN);
1725         }
1726
1727         // Convenience -- adopt PLT name
1728         frame.func->_name = plt_entries[frame.func->addr()];
1729     } else if (frame.func->retstatus() == UNSET) {
1730         frame.func->set_retstatus(NORETURN);
1731     }
1732
1733     frame.set_status(ParseFrame::PARSED);
1734
1735     if (unlikely(obj().defensiveMode())) {
1736         // calculate this after setting the function to PARSED, so that when
1737         // we finalize the function we'll actually save the results and won't
1738         // re-finalize it
1739         func->tampersStack();
1740     }
1741     _pcb.newfunction_retstatus( func );
1742 }
1743
1744 void
1745 Parser::end_block(Block * b, InstructionAdapter_t * ah)
1746 {
1747     b->_lastInsn = ah->getAddr();
1748     b->updateEnd(ah->getNextAddr());
1749 //    record_block(b);
1750 }
1751
1752 void
1753 Parser::record_block(Block *b)
1754 {
1755     parsing_printf("[%s:%d] recording block [%lx,%lx)\n",
1756                    FILE__,__LINE__,b->start(),b->end());
1757     _parse_data->record_block(b->region(),b);
1758 }
1759
1760
1761 Block *
1762 Parser::block_at(ParseFrame &frame,
1763         Function * owner,
1764         Address addr,
1765         Block * & split)
1766 {
1767 //    ScopeLock<Mutex<true> > l(work_mutex);
1768
1769     Block * exist = NULL;
1770     Block * ret = NULL;
1771     Block * inconsistent = NULL;
1772     Address prev_insn = 0;
1773
1774     split = NULL;
1775     CodeRegion *cr;
1776     if(owner->region()->contains(addr))
1777         cr = owner->region();
1778     else
1779         cr = _parse_data->reglookup(owner->region(),addr);
1780
1781
1782     if(!is_code(owner,addr)) {
1783         parsing_printf("[%s] block address %lx rejected by isCode()\n",
1784                        FILE__,addr);
1785         return NULL;
1786     }
1787
1788
1789     {
1790 #ifdef ENABLE_RACE_DETECTION
1791     // this lock causes deadlock when running in parallel, but it is
1792     // useful for suppressing unimportant races on the iterator
1793     boost::lock_guard<Function> g(*owner);
1794 #endif
1795     for (auto iter = frame.leadersToBlock.begin(); iter != frame.leadersToBlock.end(); ++iter)
1796 //    for(auto i = owner->blocks_begin();
1797 //        i != owner->blocks_end();
1798 //        ++i)
1799     {
1800         Block* b = iter->second;
1801         if(b->start() == addr) return b;
1802         Address prev_insn;
1803         if (b->consistent(addr, prev_insn)) {
1804             ret = split_block(owner, b, addr, prev_insn);
1805             split = b;
1806             frame.visited[ret->start()] = true;
1807             return ret;
1808         }
1809     }
1810         ret = _cfgfact._mkblock(owner, cr, addr);
1811         record_block(ret);
1812         return ret;
1813     }
1814 }
1815
1816 pair<Block *,Edge *>
1817 Parser::add_edge(
1818         ParseFrame & frame,
1819         Function * owner,
1820         Block * src,
1821         Address dst,
1822         EdgeTypeEnum et,
1823         Edge * exist)
1824 {
1825     // adjust anything pointing to zero to force it to sink
1826     if(dst == 0) { dst = -1; }
1827     Block * split = NULL;
1828     Block * ret = NULL;
1829     Block * original_block = NULL;
1830     Edge * newedge = NULL;
1831     pair<Block *, Edge *> retpair((Block *) NULL, (Edge *) NULL);
1832
1833     if(!is_code(owner,dst)) {
1834         parsing_printf("[%s:%d] target address %lx rejected by isCode()\n",
1835             FILE__, __LINE__, dst);
1836         return retpair;
1837     }
1838
1839     ret = block_at(frame, owner,dst, split);
1840     retpair.first = ret;
1841
1842     if(split == src) {
1843         // special case -- same block
1844         src = ret;
1845     }
1846
1847     if(et != CALL && frame.leadersToBlock.find(ret->start()) == frame.leadersToBlock.end()) {
1848         // If we created a new block, and this is not a function call,
1849         // then the new block is part of the current function.
1850         // We need to mark it
1851         frame.leadersToBlock[ret->start()] = ret;
1852     }
1853
1854     if(NULL == exist) {
1855         newedge = link(src,ret,et,false);
1856         retpair.second = newedge;
1857     } else {
1858         relink(exist,src,ret);
1859         retpair.second = exist;
1860     }
1861
1862     return retpair;
1863 }
1864
1865 Block *
1866 Parser::split_block(
1867         Function * owner,
1868         Block *b,
1869         Address addr,
1870         Address previnsn)
1871 {
1872     Block * ret;
1873     CodeRegion * cr;
1874     bool isRetBlock = false;
1875     if(owner->region()->contains(b->start()))
1876         cr = owner->region();
1877     else
1878         cr = _parse_data->reglookup(owner->region(),b->start());
1879     region_data * rd = _parse_data->findRegion(cr);
1880
1881     // enable for extra-safe testing, but callers are responsbible
1882     // assert(b->consistent(addr);
1883
1884     ret = factory()._mkblock(owner,cr,addr);
1885
1886     // move out edges
1887
1888     {
1889
1890         Block::edgelist &trgs = b->_trglist;
1891         Block::edgelist::iterator tit = trgs.begin();
1892         for (; tit != trgs.end(); ++tit) {
1893             Edge *e = *tit;
1894             e->_source = ret;
1895             ret->_trglist.push_back(e);
1896         }
1897         if (!trgs.empty() && RET == (*trgs.begin())->type()) {
1898             isRetBlock = true;
1899         }
1900         trgs.clear();
1901         ret->updateEnd(b->end());
1902         ret->_lastInsn = b->_lastInsn;
1903         ret->_parsed = true;
1904         b->updateEnd(addr);
1905         b->_lastInsn = previnsn;
1906         link(b,ret,FALLTHROUGH,false);
1907
1908         record_block(ret);
1909
1910         // b's range has changed
1911         //rd->updateBlockEnd(b, addr, previnsn);
1912         // Any functions holding b that have already been finalized
1913         // need to have their caches invalidated so that they will
1914         // find out that they have this new 'ret' block
1915         std::set<Function*> prev_owners;
1916         rd->findFuncs(b->start(),prev_owners);
1917         for(std::set<Function*>::iterator oit = prev_owners.begin();
1918             oit != prev_owners.end(); ++oit)
1919         {
1920             Function * po = *oit;
1921             if (po->_cache_valid) {
1922                 po->_cache_valid = false;
1923                 parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
1924                                        "func at %lx\n",
1925                                FILE__,__LINE__,b->start(),b->end(),po->addr());
1926             }
1927             if (isRetBlock) {
1928                 po->_retBL.clear(); //could remove b from the vector instead of clearing it, not sure what's cheaper
1929             }
1930         }
1931         // KEVINTODO: study performance impact of this callback
1932         _pcb.splitBlock(b,ret);
1933     }
1934
1935     return ret;
1936 }
1937
1938
1939 pair<Function *,Edge*>
1940 Parser::bind_call(ParseFrame & frame, Address target, Block * cur, Edge * exist)
1941 {
1942     Function * tfunc = NULL;
1943     Block * tblock = NULL;
1944
1945     // look it up
1946     tfunc = _parse_data->findFunc(frame.codereg,target);
1947     if(!tfunc) {
1948         parsing_printf("[%s:%d] can't bind call to %lx\n",
1949                        FILE__,__LINE__,target);
1950         return pair<Function*,Edge*>((Function *) NULL,exist);
1951     }
1952     assert(tfunc->entry());
1953     
1954     relink(exist,cur,tfunc->entry());
1955     return pair<Function*,Edge*>(tfunc,exist);
1956 }
1957
1958 Function *
1959 Parser::findFuncByEntry(CodeRegion *r, Address entry)
1960 {
1961     if(_parse_state < PARTIAL) {
1962         parsing_printf("[%s:%d] Parser::findFuncByEntry([%lx,%lx),%lx) "
1963                                "forced parsing\n",
1964                        FILE__,__LINE__,r->low(),r->high(),entry);
1965         parse();
1966     }
1967     return _parse_data->findFunc(r,entry);
1968 }
1969
1970 int
1971 Parser::findFuncs(CodeRegion *r, Address addr, set<Function *> & funcs)
1972 {
1973     if(_parse_state < COMPLETE) {
1974         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1975                                "forced parsing\n",
1976                        FILE__,__LINE__,r->low(),r->high(),addr);
1977         parse();
1978     }
1979     if(_parse_state < FINALIZED) {
1980         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1981                                "forced finalization\n",
1982                        FILE__,__LINE__,r->low(),r->high(),addr);
1983         finalize();
1984     }
1985     return _parse_data->findFuncs(r,addr,funcs);
1986 }
1987
1988 int
1989 Parser::findFuncs(CodeRegion *r, Address start, Address end, set<Function *> & funcs)
1990 {
1991     if(_parse_state < COMPLETE) {
1992         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
1993                                "forced parsing\n",
1994                        FILE__,__LINE__,r->low(),r->high(),start,end);
1995         parse();
1996     }
1997     if(_parse_state < FINALIZED) {
1998         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
1999                                "forced finalization\n",
2000                        FILE__,__LINE__,r->low(),r->high(),start,end);
2001         finalize();
2002     }
2003     return _parse_data->findFuncs(r,start,end,funcs);
2004 }
2005
2006 Block *
2007 Parser::findBlockByEntry(CodeRegion *r, Address entry)
2008 {
2009     if(_parse_state < PARTIAL) {
2010         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
2011                                "forced parsing\n",
2012                        FILE__,__LINE__,r->low(),r->high(),entry);
2013         parse();
2014     }
2015     return _parse_data->findBlock(r,entry);
2016 }
2017
2018 Block *
2019 Parser::findNextBlock(CodeRegion *r, Address addr)
2020 {
2021     if(_parse_state < PARTIAL) {
2022         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
2023                                "forced parsing\n",
2024                        FILE__,__LINE__,r->low(),r->high(),addr);
2025         parse();
2026     }
2027     return _parse_data->findRegion(r)->get_next_block(addr).second;
2028 }
2029
2030 int
2031 Parser::findBlocks(CodeRegion *r, Address addr, set<Block *> & blocks)
2032 {
2033     if(_parse_state < COMPLETE) {
2034         parsing_printf("[%s:%d] Parser::findBlocks([%lx,%lx),%lx,...) "
2035                                "forced parsing\n",
2036                        FILE__,__LINE__,r->low(),r->high(),addr);
2037         parse();
2038     }
2039     return _parse_data->findBlocks(r,addr,blocks);
2040 }
2041
2042 // find blocks without parsing.
2043 int Parser::findCurrentBlocks(CodeRegion* cr, Address addr,
2044                               std::set<Block*>& blocks) {
2045     return _parse_data->findBlocks(cr, addr, blocks);
2046 }
2047
2048 int Parser::findCurrentFuncs(CodeRegion * cr, Address addr, std::set<Function*> &funcs) {
2049     return _parse_data->findFuncs(cr, addr, funcs);
2050 }
2051
2052 Edge*
2053 Parser::link(Block *src, Block *dst, EdgeTypeEnum et, bool sink)
2054 {
2055     assert(et != NOEDGE);
2056     Edge * e = factory()._mkedge(src,dst,et);
2057     e->_type._sink = sink;
2058     src->addTarget(e);
2059     dst->addSource(e);
2060     _pcb.addEdge(src, e, ParseCallback::target);
2061     _pcb.addEdge(dst, e, ParseCallback::source);
2062     return e;
2063 }
2064
2065 /* 
2066  * During parsing, all edges are temporarily linked to the _tempsink
2067  * block. Adding and then removing edges to and from this block is
2068  * wasteful, especially given that removal is an O(N) operation with
2069  * vector storage. This call thus does not record edges into the sink.
2070  * These edges are guaranteed to have been relinked when parsing is
2071  * in state COMPLETE.
2072  *
2073  * NB This introduces an inconsistency into the CFG invariant that all
2074  *    targets of an edge have that edge in their source list if the
2075  *    data structures are queried during parsing. Extenders of 
2076  *    parsing callbacks are the only ones who can see this state;
2077  *    they have been warned and should know better.
2078  */
2079 Edge*
2080 Parser::link_tempsink(Block *src, EdgeTypeEnum et)
2081 {
2082     Block* tmpsink = parse_data()->findBlock(src->region(), std::numeric_limits<Address>::max());
2083     Edge * e = factory()._mkedge(src, tmpsink,et);
2084     e->_type._sink = true;
2085     src->_trglist.push_back(e);
2086     return e;
2087 }
2088
2089 void
2090 Parser::relink(Edge * e, Block *src, Block *dst)
2091 {
2092     assert(e);
2093     unsigned long srcOut = 0, dstIn = 0, oldDstIn = 0;
2094     Block* oldDst = NULL;
2095     if(e->trg() && e->trg_addr() != std::numeric_limits<Address>::max()) {
2096         oldDst = e->trg();
2097     }
2098     bool addSrcAndDest = true;
2099     if(src != e->src()) {
2100         e->src()->removeTarget(e);
2101         _pcb.removeEdge(e->src(), e, ParseCallback::target);
2102         e->_source = src;
2103         src->addTarget(e);
2104         _pcb.addEdge(src, e, ParseCallback::target);
2105     }
2106     if(dst != e->trg()) {
2107         if(oldDst) // old edge was not sink
2108         {
2109             oldDst->removeSource(e);
2110             _pcb.removeEdge(e->trg(), e, ParseCallback::source);
2111             addSrcAndDest = false;
2112         }
2113         e->_target_off = dst->start();
2114         dst->addSource(e);
2115         _pcb.addEdge(dst, e, ParseCallback::source);
2116     }
2117     if (addSrcAndDest) {
2118         // We're re-linking a sinkEdge to be a non-sink edge; since
2119         // we don't inform PatchAPI of temporary sinkEdges, we have
2120         // to add both the source AND target edges
2121         _pcb.addEdge(src, e, ParseCallback::target);
2122     }
2123     if(parse_data()->findBlock(dst->region(), dst->start()) != dst) {
2124         record_block(dst);
2125     }
2126     e->_type._sink = (dst->start() == std::numeric_limits<Address>::max());
2127 }
2128
2129 ParseFrame::Status
2130 Parser::frame_status(CodeRegion * cr, Address addr)
2131 {
2132     // XXX parsing frames may have been cleaned up, but status
2133     //     is always cached
2134     return _parse_data->frameStatus(cr,addr);
2135 }
2136
2137 void
2138 Parser::remove_func(Function *func)
2139 {
2140     if (sorted_funcs.end() != sorted_funcs.find(func)) {
2141         sorted_funcs.erase(func);
2142     }
2143     if (HINT == func->src()) {
2144         for (unsigned fidx=0; fidx < hint_funcs.size(); fidx++) {
2145             if (hint_funcs[fidx] == func) {
2146                 hint_funcs[fidx] = hint_funcs[hint_funcs.size()-1];
2147                 hint_funcs.pop_back();
2148                 break;
2149             }
2150         }
2151     }
2152     else {
2153         for (unsigned fidx=0; fidx < discover_funcs.size(); fidx++) {
2154             if (discover_funcs[fidx] == func) {
2155                 discover_funcs[fidx] = discover_funcs[discover_funcs.size()-1];
2156                 discover_funcs.pop_back();
2157                 break;
2158             }
2159         }
2160     }
2161
2162     _parse_data->remove_func(func);
2163 }
2164
2165 void
2166 Parser::remove_block(Dyninst::ParseAPI::Block *block)
2167 {
2168     boost::lock_guard<ParseData> g(*_parse_data, boost::adopt_lock);
2169     _parse_data->remove_block(block);
2170 }
2171
2172 void Parser::move_func(Function *func, Address new_entry, CodeRegion *new_reg)
2173 {
2174     region_data *reg_data = _parse_data->findRegion(func->region());
2175
2176     race_detector_fake_lock_acquire(race_detector_fake_lock(reg_data->funcsByAddr));
2177     {
2178       tbb::concurrent_hash_map<Address, Function*>::accessor a;
2179       if(reg_data->funcsByAddr.find(a, func->addr()))
2180         {
2181           reg_data->funcsByAddr.erase(a);
2182         }
2183       reg_data = _parse_data->findRegion(new_reg);
2184       reg_data->funcsByAddr.insert(a, make_pair(new_entry, func));
2185     }
2186     race_detector_fake_lock_release(race_detector_fake_lock(reg_data->funcsByAddr));
2187 }
2188
2189 void Parser::invalidateContainingFuncs(Function *owner, Block *b)
2190 {
2191     CodeRegion * cr;
2192     if(owner->region()->contains(b->start()))
2193         cr = owner->region();
2194     else
2195         cr = _parse_data->reglookup(owner->region(),b->start());
2196     region_data * rd = _parse_data->findRegion(cr);
2197
2198
2199     // Any functions holding b that have already been finalized
2200     // need to have their caches invalidated so that they will
2201     // find out that they have this new 'ret' block
2202     std::set<Function*> prev_owners;
2203     rd->findFuncs(b->start(),prev_owners);
2204     for(std::set<Function*>::iterator oit = prev_owners.begin();
2205         oit != prev_owners.end(); ++oit)
2206     {
2207         Function * po = *oit;
2208         po->_cache_valid = false;
2209         parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
2210                                "func at %lx\n",
2211                        FILE__,__LINE__,b->start(),b->end(),po->addr());
2212     }
2213 }
2214
2215 /* Add ParseFrames waiting on func back to the work queue */
2216 void Parser::resumeFrames(Function * func, LockFreeQueue<ParseFrame *> & work)
2217 {
2218     // If we do not know the function's return status, don't put its waiters back on the worklist
2219     if (func->retstatus() == UNSET) {
2220         parsing_printf("[%s] %s return status unknown, cannot resume waiters\n",
2221                        __FILE__,
2222                        func->name().c_str());
2223         return;
2224     }
2225     boost::lock_guard<DelayedFrames> g(delayed_frames);
2226
2227     // When a function's return status is set, all waiting frames back into the worklist
2228     map<Function *, set<ParseFrame *> >::iterator iter = delayed_frames.frames.find(func);
2229     if (iter == delayed_frames.frames.end()) {
2230         // There were no frames waiting, ignore
2231         parsing_printf("[%s] %s return status %d, no waiters\n",
2232                        __FILE__,
2233                        func->name().c_str(),
2234                        func->retstatus());
2235         return;
2236     } else {
2237         parsing_printf("[%s] %s return status %d, undelaying waiting functions\n",
2238                        __FILE__,
2239                        func->name().c_str(),
2240                        func->retstatus());
2241         // Add each waiting frame back to the worklist
2242         set<ParseFrame *> vec = iter->second;
2243         for (set<ParseFrame *>::iterator fIter = vec.begin();
2244              fIter != vec.end();
2245              ++fIter) {
2246             work.insert(*fIter);
2247         }
2248         // remove func from delayedFrames map
2249         delayed_frames.frames.erase(func);
2250     }
2251 }
2252
2253 bool Parser::getSyscallNumber(Function * /*func*/,
2254                               Block * block,
2255                               Address /*addr*/,
2256                               Architecture arch,
2257                               long int & val)
2258 {
2259     val = -1;
2260
2261     // In the common case, the setup of system call number
2262     // is a mov in the previous instruction. We don't currently look elsewhere.
2263     // In the future, could use slicing and symeval to find the value of
2264     // this register at the system call (as unstrip does).
2265     Block::Insns blockInsns;
2266     block->getInsns(blockInsns);
2267     auto prevPair = ++(blockInsns.rbegin());
2268     InstructionAPI::Instruction prevInsn = prevPair->second;
2269     if (prevInsn.getOperation().getID() != e_mov) {
2270         return false;
2271     }
2272
2273     MachRegister syscallNumberReg = MachRegister::getSyscallNumberReg(arch);
2274     if (syscallNumberReg == InvalidReg) {
2275         return false;
2276     }
2277     InstructionAPI::RegisterAST* regAST = new InstructionAPI::RegisterAST(syscallNumberReg);
2278     InstructionAPI::RegisterAST::Ptr regASTPtr = InstructionAPI::RegisterAST::Ptr(regAST);
2279
2280     std::vector<InstructionAPI::Operand> operands;
2281     prevInsn.getOperands(operands);
2282     for (unsigned i = 0; i < operands.size(); i++) {
2283         if (!operands[i].isWritten(regASTPtr)) {
2284             InstructionAPI::Expression::Ptr value = operands[i].getValue();
2285             InstructionAPI::Result res = value->eval();
2286             if (res.defined) {
2287                 val = res.convert<Offset>();
2288             }
2289         }
2290     }
2291
2292     return (val != -1);
2293 }
2294