Defer constructing blocksByRange to function finalizing, and removing the use blocksB...
[dyninst.git] / parseAPI / src / Parser.C
1 /*
2  * See the dyninst/COPYRIGHT file for copyright information.
3  * 
4  * We provide the Paradyn Tools (below described as "Paradyn")
5  * on an AS IS basis, and do not warrant its validity or performance.
6  * We reserve the right to update, modify, or discontinue this
7  * software at any time.  We shall have no obligation to supply such
8  * updates or modifications or any other form of support to you.
9  * 
10  * By your use of Paradyn, you understand and agree that we (or any
11  * other person or entity with proprietary rights in Paradyn) are
12  * under no obligation to provide either maintenance services,
13  * update services, notices of latent defects, or correction of
14  * defects for Paradyn.
15  * 
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  * 
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  * 
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29  */
30
31 #include <omp.h>
32
33 #include <vector>
34 #include <limits>
35 #include <algorithm>
36 // For Mutex
37 #define PROCCONTROL_EXPORTS
38
39 #include "dyntypes.h"
40
41 #include "CodeObject.h"
42 #include "CFGFactory.h"
43 #include "ParseCallback.h"
44 #include "Parser.h"
45 #include "Parser.h"
46 #include "CFG.h"
47 #include "util.h"
48 #include "debug_parse.h"
49
50 #include <boost/bind/bind.hpp>
51
52
53 #include <boost/timer/timer.hpp>
54 #include <fstream>
55
56 #ifdef ENABLE_RACE_DETECTION
57 #define USE_CILK 1
58 #else
59 #define USE_OPENMP 1
60 #endif
61
62 #if USE_CILK
63 #include <cilk/cilk.h>
64 #include <cilk/cilk_api.h>
65 #endif
66
67 #include "race-detector-annotations.h"
68
69 using namespace std;
70 using namespace Dyninst;
71 using namespace Dyninst::ParseAPI;
72 using namespace Dyninst::InstructionAPI;
73
74 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
75 typedef vector< edge_pair_t > Edges_t;
76
77 #include "common/src/dthread.h"
78
79 namespace {
80     struct less_cr {
81         bool operator()(CodeRegion * x, CodeRegion * y)
82         {
83             return x->offset() < y->offset();
84         }
85     };
86 }
87
88 Parser::Parser(CodeObject & obj, CFGFactory & fact, ParseCallbackManager & pcb) :
89         _obj(obj),
90         _cfgfact(fact),
91         _pcb(pcb),
92         _parse_data(NULL),
93         _parse_state(UNPARSED)
94 {
95     delayed_frames.size = 0;
96     // cache plt entries for fast lookup
97     const map<Address, string> & lm = obj.cs()->linkage();
98     map<Address, string>::const_iterator lit = lm.begin();
99     for( ; lit != lm.end(); ++lit) {
100         parsing_printf("Cached PLT entry %s (%lx)\n",lit->second.c_str(),lit->first);
101         plt_entries[lit->first] = lit->second;
102     }
103
104     if(obj.cs()->regions().empty()) {
105         parsing_printf("[%s:%d] CodeSource provides no CodeRegions"
106                                " -- unparesable\n",
107                        FILE__,__LINE__);
108         _parse_state = UNPARSEABLE;
109         return;
110     }
111
112     // check whether regions overlap
113     vector<CodeRegion *> const& regs = obj.cs()->regions();
114     vector<CodeRegion *> copy(regs.begin(),regs.end());
115     sort(copy.begin(),copy.end(),less_cr());
116
117     // allocate a sink block -- region is arbitrary
118     _sink = new Block(&_obj, regs[0], std::numeric_limits<Address>::max());
119
120     bool overlap = false;
121     CodeRegion * prev = copy[0], *cur = NULL;
122     for(unsigned i=1;i<copy.size();++i) {
123         cur = copy[i];
124         if(cur->offset() < prev->offset() + prev->length()) {
125             parsing_printf("Overlapping code regions [%lx,%lx) and [%lx,%lx)\n",
126                            prev->offset(),prev->offset()+prev->length(),
127                            cur->offset(),cur->offset()+cur->length());
128             overlap = true;
129             break;
130         }
131     }
132
133     if(overlap)
134         _parse_data = new OverlappingParseData(this,copy);
135     else
136         _parse_data = new StandardParseData(this);
137 }
138
139 ParseFrame::~ParseFrame()
140 {
141     cleanup(); // error'd frames still need cleanup
142 }
143
144 Parser::~Parser()
145 {
146     if(_parse_data)
147         delete _parse_data;
148
149     for(auto fit = frames.begin() ; fit != frames.end(); ++fit)
150         delete *fit;
151
152     frames.clear();
153 }
154
155 void
156 Parser::add_hint(Function * f)
157 {
158     if(!_parse_data->findFunc(f->region(),f->addr()))
159         record_func(f);
160 }
161
162 template <typename T>
163 std::string pair_to_string(T pr)
164 {
165     std::stringstream s;
166     s << pr.first << ", " << pr.second;
167     return s.str();
168 }
169
170
171 void
172 Parser::parse()
173 {
174     parsing_printf("[%s:%d] parse() called on Parser %p with state %d\n",
175                    FILE__,__LINE__,this, _parse_state);
176
177     // For modification: once we've full-parsed once, don't do it again
178     if (_parse_state >= COMPLETE) return;
179
180     ScopeLock<Mutex<true> > L(parse_mutex);
181     parse_vanilla();
182     finalize();
183     // anything else by default...?
184
185     if(_parse_state < COMPLETE)
186         _parse_state = COMPLETE;
187
188     parsing_printf("[%s:%d] parsing complete for Parser %p with state %d\n", FILE__, __LINE__, this, _parse_state);
189 #ifdef ADD_PARSE_FRAME_TIMERS
190     std::ofstream stat_log("functions.csv");
191     stat_log << "Results for " << time_histogram.size() << " buckets\n";
192     stat_log << "usecs,count\n";
193     std::transform(time_histogram.begin(), time_histogram.end(),
194                    std::ostream_iterator<std::string >(stat_log, "\n"),
195                    pair_to_string<std::pair<unsigned int, unsigned int> >);
196 #endif
197 }
198
199 void
200 Parser::parse_at(
201         CodeRegion * region,
202         Address target,
203         bool recursive,
204         FuncSource src)
205 {
206     Function *f;
207     ParseFrame *pf;
208     LockFreeQueue<ParseFrame *> work;
209
210     parsing_printf("[%s:%d] entered parse_at([%lx,%lx),%lx)\n",
211                    FILE__,__LINE__,region->low(),region->high(),target);
212
213     if(!region->contains(target)) {
214         parsing_printf("\tbad address, bailing\n");
215         return;
216     }
217
218     if(_parse_state < PARTIAL)
219         _parse_state = PARTIAL;
220     f = _parse_data->createAndRecordFunc(region, target, src);
221     if (f == NULL)
222         f = _parse_data->findFunc(region,target);
223     if(!f) {
224         parsing_printf("   could not create function at %lx\n",target);
225         return;
226     }
227
228     ParseFrame::Status exist = _parse_data->frameStatus(region,target);
229     if(exist != ParseFrame::BAD_LOOKUP && exist != ParseFrame::UNPARSED) {
230         parsing_printf("   function at %lx already parsed, status %d\n",
231                        target, exist);
232         return;
233     }
234     pf = _parse_data->createAndRecordFrame(f);
235     if (pf != NULL) {
236         frames.insert(pf);
237     } else {
238         pf = _parse_data->findFrame(region, target);
239     }
240     if (pf->func->entry())
241         work.insert(pf);
242     parse_frames(work,recursive);
243
244     // downgrade state if necessary
245     if(_parse_state > COMPLETE)
246         _parse_state = COMPLETE;
247
248 }
249
250 void
251 Parser::parse_at(Address target, bool recursive, FuncSource src)
252 {
253     CodeRegion * region = NULL;
254
255     parsing_printf("[%s:%d] entered parse_at(%lx)\n",FILE__,__LINE__,target);
256
257     if(_parse_state == UNPARSEABLE)
258         return;
259
260     StandardParseData * spd = dynamic_cast<StandardParseData *>(_parse_data);
261     if(!spd) {
262         parsing_printf("   parse_at is invalid on overlapping regions\n");
263         return;
264     }
265
266     region = spd->reglookup(region,target); // input region ignored for SPD
267     if(!region) {
268         parsing_printf("   failed region lookup at %lx\n",target);
269         return;
270     }
271
272     parse_at(region,target,recursive,src);
273 }
274
275 void
276 Parser::parse_vanilla()
277 {
278     LockFreeQueue<ParseFrame *> work;
279
280     parsing_printf("[%s:%d] entered parse_vanilla()\n",FILE__,__LINE__);
281     parsing_printf("\t%d function hints\n",hint_funcs.size());
282
283     if(_parse_state < PARTIAL)
284         _parse_state = PARTIAL;
285     else
286         parsing_printf("\tparse state is %d, some parsing already done\n",
287                        _parse_state);
288
289     /* Initialize parse frames from hints */
290
291     // Note: there is no fundamental obstacle to parallelizing this loop. However,
292     // race conditions need to be resolved in supporting laysrs first.
293     for (unsigned int i = 0; i < hint_funcs.size(); i++) {
294         Function * hf = hint_funcs[i];
295         ParseFrame::Status test = frame_status(hf->region(),hf->addr());
296         if(test != ParseFrame::BAD_LOOKUP &&
297            test != ParseFrame::UNPARSED)
298         {
299             parsing_printf("\tskipping repeat parse of %lx [%s]\n",
300                            hf->addr(),hf->name().c_str());
301             continue;
302         }
303         
304         ParseFrame *pf = _parse_data->createAndRecordFrame(hf);
305         if (pf == NULL) {
306             pf = _parse_data->findFrame(hf->region(), hf->addr());
307         } else {
308             frames.insert(pf);
309         }
310         if (pf->func->entry())
311             work.insert(pf);
312     }
313
314     parse_frames(work,true);
315 }
316
317 void
318 Parser::parse_edges( vector< ParseWorkElem * > & work_elems )
319 {
320     if(_parse_state == UNPARSEABLE)
321         return;
322
323     // build up set of needed parse frames and load them with work elements
324     set<ParseFrame*> frameset; // for dup checking
325     LockFreeQueue<ParseFrame*> frames;
326
327     for (unsigned idx=0; idx < work_elems.size(); idx++) {
328
329         ParseWorkElem *elem = work_elems[idx];
330         Block *src = elem->edge()->src();
331
332         if (elem->order() == ParseWorkElem::call_fallthrough)
333         {
334             Edge *callEdge = NULL;
335             boost::lock_guard<Block> g(*src);
336             Block::edgelist trgs = src->targets();
337             for (Block::edgelist::iterator eit = trgs.begin();
338                  eit != trgs.end();
339                  eit++)
340             {
341                 if ((*eit)->type() == CALL) {
342                     callEdge = *eit;
343                     if (!(*eit)->sinkEdge()) // if it's a sink edge, look for nonsink CALL
344                         break;
345                 }
346             }
347             // create a call work elem so that the bundle is complete
348             // and set the target function's return status and 
349             // tamper to RETURN and TAMPER_NONE, respectively
350             //assert(callEdge);
351
352             // When we have a direct call to unallocated memory or garbage
353             // we there is no call edge when the call is first executed. I'm
354             // not sure why not, so I'm attempting to continue past here without
355             // fixing up the called function...
356             // Also, in the case I saw, the callee was _not_ returning. Not directly. It 
357             // led into a big case with a longjmp() equivalent. 
358
359             if (callEdge)
360             {
361                 bool isResolvable = false;
362                 Address callTarget = 0;
363                 if ( ! callEdge->sinkEdge() )
364                 {
365                     isResolvable = true;
366                     callTarget = callEdge->trg_addr();
367                     // the call target may be in another Code Object
368                     Function *callee = callEdge->trg()->obj()->findFuncByEntry(
369                             callEdge->trg()->region(), callTarget);
370                     assert(callee);
371                     callee->set_retstatus(RETURN);
372                     callee->_tamper = TAMPER_NONE;
373                 }
374                 elem->bundle()->add(new ParseWorkElem
375                                             ( elem->bundle(),
376                                               callEdge,
377                                               callTarget,
378                                               isResolvable,
379                                               false ));
380             }
381         }
382
383         vector<Function*> funcs;
384         src->getFuncs(funcs);
385         for (unsigned fix=1; fix < funcs.size(); fix++) {
386             // if the block is shared, all of its funcs need
387             // to add the new edge
388             funcs[fix]->_cache_valid = false;
389         }
390         Function * f = NULL;
391         if (funcs.size() >  0) {
392             // Choosing any function is fine
393             f = funcs[0];
394         } else {
395             f = _parse_data->createAndRecordFunc(src->region(), src->start(), RT);
396         }
397
398         ParseFrame *frame = _parse_data->createAndRecordFrame(f);
399         if (frame == NULL) {
400             frame = _parse_data->findFrame(src->region(), f->addr());
401         } 
402         if (elem->bundle()) {
403             frame->work_bundles.push_back(elem->bundle());
404         }
405         // push before frame init so no seed is added
406         frame->pushWork(elem);
407         if (frameset.end() == frameset.find(frame)) {
408             frameset.insert(frame);
409             frames.insert(frame);
410         }
411     }
412     ScopeLock<Mutex<true> > L(parse_mutex);
413     // now parse
414     if(_parse_state < PARTIAL)
415         _parse_state = PARTIAL;
416
417     parse_frames( frames, true );
418
419     if(_parse_state > COMPLETE)
420         _parse_state = COMPLETE;
421
422     finalize();
423
424 }
425
426
427 LockFreeQueueItem<ParseFrame*> *
428 Parser::ProcessOneFrame(ParseFrame* pf, bool recursive) {
429   LockFreeQueueItem<ParseFrame*> *frame_list = 0;
430   if (pf->func && !pf->swap_busy(true)) {
431     boost::lock_guard<ParseFrame> g(*pf);
432 #ifdef ADD_PARSE_FRAME_TIMERS
433     boost::timer::cpu_timer t;
434     t.start();
435 #endif
436     parse_frame(*pf,recursive);
437 #ifdef ADD_PARSE_FRAME_TIMERS
438     t.stop();
439     unsigned int msecs = floor(t.elapsed().wall / 1000000.0);
440     race_detector_fake_lock_acquire(race_detector_fake_lock(time_histogram));
441     {
442       tbb::concurrent_hash_map<unsigned int, unsigned int>::accessor a;
443       time_histogram.insert(a, msecs);
444       ++(a->second);
445     }
446     race_detector_fake_lock_release(race_detector_fake_lock(time_histogram));
447 #endif
448     frame_list = postProcessFrame(pf, recursive);
449
450     // exclusive access to each ParseFrame is mediated by marking a frame busy.
451     // we clear evidence of our access to the ParseFrame here because a concurrent
452     // thread may try to touch it because of duplicates in the work list. that
453     // won't actually be concurrent because of swap_busy. we suppress the race
454     // report by scrubbing information about our access.
455     race_detector_forget_access_history(pf, sizeof(*pf));
456
457     pf->swap_busy(false);
458   }
459   return frame_list;
460 }
461
462 LockFreeQueueItem<ParseFrame *> *Parser::postProcessFrame(ParseFrame *pf, bool recursive) {
463     LockFreeQueue<ParseFrame*> work;
464     switch(pf->status()) {
465         case ParseFrame::CALL_BLOCKED: {
466             parsing_printf("[%s] frame %lx blocked at %lx\n",
467                            FILE__,pf->func->addr(),pf->curAddr);
468             {
469
470                 assert(pf->call_target);
471
472                 parsing_printf("    call target %lx\n",pf->call_target->addr());
473                 work.insert(pf);
474
475                 CodeRegion * cr = pf->call_target->region();
476                 Address targ = pf->call_target->addr();
477                 ParseFrame * tf = NULL;
478
479                 tf = _parse_data->createAndRecordFrame(pf->call_target);
480                 if (tf) {
481                     frames.insert(tf);
482                 }
483                 else {
484                     tf = _parse_data->findFrame(cr, targ);
485                 }
486                 // tf can still be NULL if the target frame is parsed and deleted
487
488                 // We put the frame at the front of the worklist, so 
489                 // the parser will parse the callee next
490                 if(tf && recursive && tf->func->entry())
491                     work.insert(tf);
492
493             }
494             resumeFrames(pf->func, work);
495             break;
496         }
497         case ParseFrame::PARSED:{
498             parsing_printf("[%s] frame %lx complete, return status: %d\n",
499                            FILE__,pf->func->addr(),pf->func->retstatus());
500
501             if (unlikely(_obj.defensiveMode() &&
502                          TAMPER_NONE != pf->func->tampersStack() &&
503                          TAMPER_NONZERO != pf->func->tampersStack() ))
504             {   // may adjust CALL_FT targets, add a frame to the worklist,
505                 // or trigger parsing in a separate target CodeObject
506                 tamper_post_processing(work,pf);
507             }
508
509             /* add waiting frames back onto the worklist */
510             resumeFrames(pf->func, work);
511
512             pf->cleanup();
513             break;
514         }
515         case ParseFrame::FRAME_ERROR:
516             parsing_printf("[%s] frame %lx error at %lx\n",
517                            FILE__,pf->func->addr(),pf->curAddr);
518             break;
519         case ParseFrame::FRAME_DELAYED:
520         {
521             boost::lock_guard<DelayedFrames> g(delayed_frames);
522             parsing_printf("[%s] frame %lx delayed at %lx\n",
523                                                                                   FILE__,
524                                                                                   pf->func->addr(),
525                                                                                   pf->curAddr);
526
527             if (pf->delayedWork.size()) {
528                 // Add frame to global delayed list
529
530                 for (auto iter = pf->delayedWork.begin();
531                      iter != pf->delayedWork.end();
532                      ++iter) {
533
534                     Function * ct = iter->second;
535                     parsing_printf("[%s] waiting on %s\n",
536                                    __FILE__,
537                                    ct->name().c_str());
538                     {
539
540                         auto fIter = delayed_frames.frames.find(ct);
541                         if (fIter == delayed_frames.frames.end()) {
542                             set<ParseFrame *> waiters;
543                             waiters.insert(pf);
544                             delayed_frames.frames[ct] = waiters;
545                         } else {
546                             delayed_frames.frames[ct].insert(pf);
547                         }
548                     }
549                 }
550             } else {
551                 // We shouldn't get here
552                 assert(0 && "Delayed frame with no delayed work");
553             }
554
555             /* if the return status of this function has been updated, add
556              * waiting frames back onto the work list */
557             resumeFrames(pf->func, work);
558             break;
559         }
560         case ParseFrame::PROGRESS:
561             // another thread is working on this already; possibly something else unblocked.
562             break;
563
564         default:
565             assert(0 && "invalid parse frame status");
566     }
567     return work.steal();
568 }
569
570
571 static void
572 InsertFrames
573 (
574  LockFreeQueueItem<ParseFrame *> *frames,
575  LockFreeQueue<ParseFrame *> *q
576 )
577 {
578   if (frames) {
579     LockFreeQueue<ParseFrame *> myq(frames);
580     q->splice(myq);
581   }
582 }
583
584
585 void
586 Parser::LaunchWork
587 (
588  LockFreeQueueItem<ParseFrame*> *frame_list,
589  bool recursive
590 )
591 {
592   LockFreeQueue<ParseFrame *> private_queue(frame_list);
593   for(;;) {
594     LockFreeQueueItem<ParseFrame *> *first = private_queue.pop();
595     if (first == 0) break;
596     ParseFrame *frame = first->value();
597     delete first;
598 #if USE_OPENMP
599 #pragma omp task firstprivate(frame, recursive)
600     SpawnProcessFrame(frame, recursive);
601 #else
602     cilk_spawn SpawnProcessFrame(frame, recursive);
603 #endif
604   }
605 }
606
607
608 void
609 Parser::SpawnProcessFrame
610 (
611  ParseFrame *pf,
612  bool recursive
613 )
614 {
615   LockFreeQueueItem<ParseFrame*> *new_frames = ProcessOneFrame(pf, recursive);
616   LaunchWork(new_frames, recursive);
617 }
618
619 void print_work_queue(LockFreeQueue<ParseFrame *> *work_queue)
620 {
621   LockFreeQueueItem<ParseFrame *> *current = work_queue->peek();
622
623   std::cout << "Work Queue" << std::endl;
624   while (current) {
625     std::cout << "  parse frame " << std::hex << current->value()
626               << std::dec << std::endl;
627     current = current->next();
628   }
629 }
630
631
632 void
633 Parser::ProcessFrames
634 (
635  LockFreeQueue<ParseFrame *> *work_queue,
636  bool recursive
637 )
638 {
639 #if USE_OPENMP
640 #pragma omp parallel shared(work_queue)
641   {
642 #pragma omp master
643     LaunchWork(work_queue->steal(), recursive);
644   }
645 #else
646   LaunchWork(work_queue->steal(), recursive);
647 #endif
648 }
649
650
651 void
652 Parser::parse_frames(LockFreeQueue<ParseFrame *> &work, bool recursive)
653 {
654     ProcessFrames(&work, recursive);
655
656     bool done = false, cycle = false;
657     {
658         boost::lock_guard<DelayedFrames> g(delayed_frames);
659
660         // Check if we can resume any frames yet
661         vector<Function *> updated;
662         for (auto iter = delayed_frames.frames.begin();
663              iter != delayed_frames.frames.end();
664              ++iter) {
665             if (iter->first->retstatus() != UNSET) {
666                 updated.push_back(iter->first);
667             }
668         }
669         if (updated.size()) {
670             for (auto uIter = updated.begin();
671                  uIter != updated.end();
672                  ++uIter) {
673                 resumeFrames((*uIter), work);
674             }
675         }
676
677         if(delayed_frames.frames.empty() && updated.empty()) {
678             parsing_printf("[%s] Fixed point reached (0 funcs with unknown return status)\n)",
679                            __FILE__);
680             delayed_frames.size = 0;
681             done = true;
682         } else if(delayed_frames.size == delayed_frames.frames.size() && updated.empty()) {
683             cycle = true;
684         }
685     }
686
687     // Use fixed-point to ensure we parse frames whose parsing had to be delayed
688     if (!done) {
689         if(cycle) {
690             processCycle(work,recursive);
691         } else {
692             processFixedPoint(work, recursive);
693         }
694     }
695
696     cleanup_frames();
697 }
698
699 void Parser::processFixedPoint(LockFreeQueue<ParseFrame *> &work, bool recursive) {// We haven't yet reached a fixedpoint; let's recurse
700     {
701         boost::lock_guard<DelayedFrames> g(delayed_frames);
702
703         parsing_printf("[%s] Fixed point not yet reached (%d funcs with unknown return status)\n",
704                        __FILE__,
705                        delayed_frames.frames.size());
706
707         // Update delayed_frames.size for next iteration
708         delayed_frames.size = delayed_frames.frames.size();
709     }
710     // Recurse through parse_frames
711     parsing_printf("[%s] Calling parse_frames again... \n", __FILE__);
712
713     parse_frames(work, recursive);
714 }
715
716 void Parser::processCycle(LockFreeQueue<ParseFrame *> &work, bool recursive) {// If we've reached a fixedpoint and have remaining frames, we must
717     // have a cyclic dependency
718     vector<Function *> updated;
719     {
720         boost::lock_guard<DelayedFrames> g(delayed_frames);
721         parsing_printf("[%s] Fixed point reached (%d funcs with unknown return status)\n",
722                        __FILE__,
723                        delayed_frames.frames.size());
724
725         // Mark UNSET functions in cycle as NORETURN
726         // except if we're doing non-recursive parsing.
727         // If we're just parsing one function, we want
728         // to mark everything RETURN instead.
729         for (auto iter = delayed_frames.frames.begin(); iter != delayed_frames.frames.end(); ++iter) {
730             Function * func = iter->first;
731             if (func->retstatus() == UNSET) {
732                 if(recursive)
733                 {
734                     func->set_retstatus(NORETURN);
735                     func->obj()->cs()->incrementCounter(PARSE_NORETURN_HEURISTIC);
736                 }
737                 else
738                 {
739                     func->set_retstatus(RETURN);
740                 }
741                 updated.push_back(func);
742             }
743
744             set<ParseFrame *> vec = iter->second;
745             for (auto vIter = vec.begin(); vIter != vec.end(); ++vIter) {
746                 Function * delayed = (*vIter)->func;
747                 if (delayed->retstatus() == UNSET) {
748                     delayed->set_retstatus(NORETURN);
749                     delayed->obj()->cs()->incrementCounter(PARSE_NORETURN_HEURISTIC);
750                     updated.push_back(func);
751                 }
752             }
753         }
754
755     }
756
757     // We should have updated the return status of one or more frames; recurse
758     if (updated.size()) {
759         for (auto uIter = updated.begin();
760              uIter != updated.end();
761              ++uIter) {
762             resumeFrames((*uIter), work);
763         }
764
765         if (work.peek()) {
766             parsing_printf("[%s] Updated retstatus of delayed frames, trying again...\n", __FILE__);
767             parse_frames(work, recursive);
768         }
769     } else {
770         // We shouldn't get here
771         parsing_printf("[%s] No more work can be done (%d funcs with unknown return status)\n",
772                        __FILE__,
773                        delayed_frames.frames.size());
774         assert(0);
775     }
776 }
777
778 void Parser::cleanup_frames()  {
779   vector <ParseFrame *> pfv;
780   std::copy(frames.begin(), frames.end(), std::back_inserter(pfv));
781 #if USE_OPENMP
782 #pragma omp parallel for schedule(auto)
783   for (unsigned int i = 0; i < pfv.size(); i++) {
784     ParseFrame *pf = pfv[i];
785     if (pf) {
786       _parse_data->remove_frame(pf);
787       delete pf;
788     }
789   }
790   frames.clear();
791 #elif USE_CILK
792   cilk_for(unsigned i=0; i < pfv.size(); ++i) {
793     ParseFrame *pf = pfv[i];
794     if (pf) {
795       _parse_data->remove_frame(pf);
796       delete pf;
797     }
798   }
799 #else
800   for(unsigned i=0; i < pfv.size(); ++i) {
801     ParseFrame *pf = pfv[i];
802     if (pf) {
803       _parse_data->remove_frame(pf);
804       delete pf;
805     }
806   }
807 #endif
808   frames.clear();
809 }
810
811 /* Finalizing all functions for consumption:
812   
813    - Finish delayed parsing
814    - Prepare and record FuncExtents for range-based lookup
815 */
816
817 void
818 Parser::finalize(Function *f)
819 {
820     if(f->_cache_valid) {
821         return;
822     }
823
824     if(!f->_parsed) {
825         parsing_printf("[%s:%d] Parser::finalize(f[%lx]) "
826                                "forced parsing\n",
827                        FILE__,__LINE__,f->addr());
828         parse_at(f->region(), f->addr(), true, f->src());
829     }
830
831     bool cache_value = true;
832     boost::unique_lock<boost::mutex> finalize_write_lock(finalize_mutex);
833     /* this is commented out to prevent a failure in tampersStack, but
834            this may be an incorrect approach to fixing the problem.
835     if(frame_status(f->region(), f->addr()) < ParseFrame::PARSED) {
836         // XXX prevent caching of blocks, extents for functions that
837         // are actively being parsed. This prevents callbacks and other
838         // functions called from within, e.g. parse_frame from setting
839         // the caching flag and preventing later updates to the blocks()
840         // vector during finalization.
841         cache_value = false;
842     }*/
843
844     parsing_printf("[%s] finalizing %s (%lx)\n",
845                    FILE__,f->name().c_str(),f->addr());
846
847     region_data * rd = _parse_data->findRegion(f->region());
848     assert(rd);
849
850     // finish delayed parsing and sorting
851     Function::blocklist blocks = f->blocks_int();
852
853     // is this the first time we've parsed this function?
854     if (unlikely( !f->_extents.empty() )) {
855         _parse_data->remove_extents(f->_extents);
856         f->_extents.clear();
857     }
858
859
860     if(blocks.empty()) {
861         f->_cache_valid = cache_value; // see above
862         return;
863     }
864
865     auto bit = blocks.begin();
866     FuncExtent * ext = NULL;
867     Address ext_s = (*bit)->start();
868     Address ext_e = ext_s;
869
870     for( ; bit != blocks.end(); ++bit) {
871         Block * b = *bit;
872         rd->insertBlockByRange(b);
873         if(b->start() > ext_e) {
874             ext = new FuncExtent(f,ext_s,ext_e);
875
876             // remove access history for ext before publishing it
877             // to concurrent readers to avoid false race reports.
878             // ext is written before it is published and only read
879             // thereafter.
880             race_detector_forget_access_history(ext, sizeof(*ext));
881
882             parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
883             f->_extents.push_back(ext);
884             rd->funcsByRange.insert(ext);
885             ext_s = b->start();
886         }
887         ext_e = b->end();
888     }
889     ext = new FuncExtent(f,ext_s,ext_e);
890
891     // remove access history for ext before publishing it
892     // to concurrent readers to avoid false race reports.
893     // ext is written before it is published and only read
894     // thereafter.
895     race_detector_forget_access_history(ext, sizeof(*ext));
896
897     parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
898     rd->funcsByRange.insert(ext);
899     f->_extents.push_back(ext);
900
901     f->_cache_valid = cache_value; // see comment at function entry
902
903     if (unlikely( f->obj()->defensiveMode())) {
904         // add fallthrough edges for calls assumed not to be returning
905         // whose fallthrough blocks we parsed anyway (this happens if 
906         // the FT block is also a branch target)
907         Function::edgelist & edges = f->_call_edge_list;
908         for (Function::edgelist::iterator eit = edges.begin();
909              eit != edges.end();
910              eit++)
911         {
912             boost::lock_guard<Block> g(*(*eit)->src());
913             if (2 > (*eit)->src()->targets().size()) {
914                 Block *ft = _parse_data->findBlock((*eit)->src()->region(),
915                                                    (*eit)->src()->end());
916                 if (ft && HASHDEF(f->_bmap,ft->start())) {
917                     link((*eit)->src(),ft,CALL_FT,false);
918                 }
919             }
920         }
921     }
922 }
923
924 void
925 Parser::finalize()
926 {
927     if(_parse_state < FINALIZED) {
928         finalize_funcs(hint_funcs);
929         finalize_funcs(discover_funcs);
930         _parse_state = FINALIZED;
931     }
932 }
933
934 void
935 Parser::finalize_funcs(vector<Function *> &funcs)
936 {
937     vector<Function *>::iterator fit = funcs.begin();
938     for( ; fit != funcs.end(); ++fit) {
939         finalize(*fit);
940     }
941 }
942
943 void
944 Parser::record_func(Function *f)
945 {
946     if(!f) return;
947
948     if(f->src() == HINT)
949         hint_funcs.push_back(f);
950     else
951         discover_funcs.push_back(f);
952
953     sorted_funcs.insert(f);
954
955     _parse_data->record_func(f);
956 }
957
958 void
959 Parser::init_frame(ParseFrame & frame)
960 {
961     boost::lock_guard<ParseFrame> g(frame);
962     Block * b = NULL;
963     Block * split = NULL;
964
965     if ( ! frame.func->_entry )
966     {
967         // Find or create a block
968         b = block_at(frame, frame.func, frame.func->addr(),split);
969         if(b) {
970             frame.leadersToBlock[frame.func->addr()] = b;
971             frame.func->_entry = b;
972             frame.seed = new ParseWorkElem(NULL,NULL,frame.func->addr(),true,false);
973             frame.pushWork(frame.seed);
974         } else {
975             parsing_printf("[%s] failed to initialize parsing frame\n",
976                            FILE__);
977             return;
978         }
979         if (split) {
980             _pcb.splitBlock(split,b);
981         }
982     }
983
984     // FIXME these operations should move into the actual parsing
985     Address ia_start = frame.func->addr();
986     unsigned size =
987             frame.codereg->offset() + frame.codereg->length() - ia_start;
988     const unsigned char* bufferBegin =
989             (const unsigned char *)(frame.func->isrc()->getPtrToInstruction(ia_start));
990     InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
991     InstructionAdapter_t* ah = InstructionAdapter_t::makePlatformIA_IAPI(obj().cs()->getArch(),
992                                                                          dec, ia_start, frame.func->obj(),
993                                                                          frame.codereg, frame.func->isrc(), b);
994     if(ah->isStackFramePreamble()) {
995         frame.func->_no_stack_frame = false;
996     }
997     frame.func->_saves_fp = ah->savesFP();
998     delete ah;
999 }
1000
1001 void ParseFrame::cleanup()
1002 {
1003     boost::lock_guard<ParseFrame> g(*this);
1004     for(unsigned i=0;i<work_bundles.size();++i)
1005         delete work_bundles[i];
1006     work_bundles.clear();
1007     if(seed)
1008         delete seed;
1009     seed = NULL;
1010 }
1011
1012 namespace {
1013     inline Edge * bundle_call_edge(ParseWorkBundle * b)
1014     {
1015         if(!b) return NULL;
1016
1017         vector<ParseWorkElem*> const& elems = b->elems();
1018         vector<ParseWorkElem*>::const_iterator it = elems.begin();
1019         for( ; it != elems.end(); ++it) {
1020             if((*it)->edge()->type() == CALL)
1021                 return (*it)->edge();
1022         }
1023         return NULL;
1024     }
1025
1026     /* 
1027      * Look up the next block for detection of straight-line
1028      * fallthrough edges into existing blocks.
1029      */
1030 }
1031
1032 void
1033 Parser::parse_frame(ParseFrame & frame, bool recursive) {
1034     /** Persistent intermediate state **/
1035     InstructionAdapter_t *ahPtr = NULL;
1036     ParseFrame::worklist_t & worklist = frame.worklist;
1037     map<Address, Block *> & leadersToBlock = frame.leadersToBlock;
1038     Address & curAddr = frame.curAddr;
1039     Function * func = frame.func;
1040     dyn_hash_map<Address, bool> & visited = frame.visited;
1041     unsigned & num_insns = frame.num_insns;
1042     func->_cache_valid = false;
1043
1044     /** Non-persistent intermediate state **/
1045     Address nextBlockAddr;
1046     Block * nextBlock;
1047
1048     if (frame.status() == ParseFrame::UNPARSED) {
1049         parsing_printf("[%s] ==== starting to parse frame %lx ====\n",
1050                        FILE__,frame.func->addr());
1051         // prevents recursion of parsing
1052         frame.func->_parsed = true;
1053     } else {
1054         parsing_printf("[%s] ==== resuming parse of frame %lx ====\n",
1055                        FILE__,frame.func->addr());
1056         // Pull work that can be resumed off the delayedWork list
1057         std::map<ParseWorkElem *, Function *>::iterator iter;
1058
1059         vector<ParseWorkElem *> updated;
1060         vector<ParseWorkElem *>::iterator uIter;
1061
1062         for (iter = frame.delayedWork.begin();
1063              iter != frame.delayedWork.end();
1064              ++iter) {
1065             if (iter->second->retstatus() != UNSET) {
1066                 frame.pushWork(iter->first);
1067                 updated.push_back(iter->first);
1068             }
1069         }
1070         for (uIter = updated.begin(); uIter != updated.end(); ++uIter) {
1071             frame.delayedWork.erase(*uIter);
1072         }
1073     }
1074
1075     frame.set_status(ParseFrame::PROGRESS);
1076
1077     while(!worklist.empty()) {
1078
1079         Block * cur = NULL;
1080         ParseWorkElem * work = frame.popWork();
1081         if (work->order() == ParseWorkElem::call) {
1082             parsing_printf("Handling call work element\n");
1083             Function * ct = NULL;
1084             
1085             // If we're not doing recursive traversal, skip *all* of the call edge processing.
1086             // We don't want to create a stub. We'll handle the assumption of a fallthrough
1087             // target for the fallthrough work element, not the call work element. Catch blocks
1088             // will get excluded but that's okay; our heuristic is not reliable enough that I'm willing to deploy
1089             // it when we'd only be detecting a non-returning callee by name anyway.
1090             // --BW 12/2012
1091             if (!recursive) {
1092                 parsing_printf("[%s] non-recursive parse skipping call %lx->%lx\n",
1093                                FILE__, work->edge()->src()->lastInsnAddr(), work->target());
1094                 continue;
1095             }
1096             
1097             FuncSource how = RT;
1098             if(frame.func->_src == GAP || frame.func->_src == GAPRT)
1099                 how = GAPRT;
1100
1101             ct = _parse_data->createAndRecordFunc(frame.codereg, work->target(), how);
1102             if (ct) {
1103                 _pcb.discover_function(ct);
1104             } else {
1105                 ct = _parse_data->findFunc(frame.codereg, work->target());
1106             }
1107             bool frame_parsing_not_started = 
1108                 (frame_status(ct->region(),ct->addr())==ParseFrame::UNPARSED ||
1109                  frame_status(ct->region(),ct->addr())==ParseFrame::BAD_LOOKUP);
1110             parsing_printf("\tframe %lx, UNPARSED: %d, BAD_LOOKUP %d\n", ct->addr(), frame_status(ct->region(),ct->addr())==ParseFrame::UNPARSED,frame_status(ct->region(),ct->addr())==ParseFrame::BAD_LOOKUP);
1111
1112
1113             if (!frame_parsing_not_started && !work->callproc()) {
1114                 parsing_printf("[%s] binding call (call target should have been parsed) %lx->%lx\n",
1115                         FILE__,work->edge()->src()->lastInsnAddr(),work->target());
1116                 Function *tfunc = _parse_data->findFunc(frame.codereg,work->target());
1117                 pair<Function*,Edge*> ctp =
1118                         bind_call(frame,
1119                                   work->target(),
1120                                   work->edge()->src(),
1121                                   work->edge());
1122                 work->mark_call();
1123             }
1124
1125             if (recursive && ct && frame_parsing_not_started) {
1126                 // suspend this frame and parse the next
1127                 parsing_printf("    [suspend frame %lx]\n", func->addr());
1128                 frame.call_target = ct;
1129                 frame.set_status(ParseFrame::CALL_BLOCKED);
1130                 // need to re-visit this edge
1131                 frame.pushWork(work);
1132                 if (ahPtr) delete ahPtr;
1133                 return;
1134             } else if (ct && work->tailcall()) {
1135                 // XXX The target has been or is currently being parsed (else
1136                 //     the previous conditional would have been taken),
1137                 //     so if its return status is unset then this
1138                 //     function has to take UNKNOWN
1139                 if (func->retstatus() != RETURN) {
1140                     if (ct->retstatus() > NORETURN)
1141                       func->set_retstatus(ct->retstatus());
1142                     else if (ct->retstatus() == UNSET)
1143                       frame.pushDelayedWork(work, ct);
1144                 }
1145             }
1146
1147             continue;
1148         } else if (work->order() == ParseWorkElem::call_fallthrough) {
1149             // check associated call edge's return status
1150             Edge * ce = bundle_call_edge(work->bundle());
1151             if (!ce) {
1152                 // odd; no call edge in this bundle
1153                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
1154                                FILE__,work->edge()->src()->lastInsnAddr());
1155             } else {
1156                 // check for system call FT
1157                 Edge* edge = work->edge();
1158                 Block::Insns blockInsns;
1159 //                boost::lock_guard<Block> src_guard(*edge->src());
1160                 edge->src()->getInsns(blockInsns);
1161                 auto prev = blockInsns.rbegin();
1162                 InstructionAPI::Instruction prevInsn = prev->second;
1163                 bool is_nonret = false;
1164
1165                 if (prevInsn.getOperation().getID() == e_syscall) {
1166                     Address src = edge->src()->lastInsnAddr();
1167
1168
1169                     // Need to determine if system call is non-returning
1170                     long int syscallNumber;
1171                     if (!getSyscallNumber(func, edge->src(), src, frame.codereg->getArch(), syscallNumber)) {
1172                         // If we cannot retrieve a syscall number, assume the syscall returns
1173                         parsing_printf("[%s] could not retrieve syscall edge, assuming returns\n", FILE__);
1174                     } else {
1175                         if (obj().cs()->nonReturningSyscall(syscallNumber)) {
1176                             is_nonret = true;
1177                         }
1178                     }
1179
1180                     if (is_nonret) {
1181                         parsing_printf("[%s] no fallthrough for non-returning syscall\n",
1182                                        FILE__,
1183                                        work->edge()->src()->lastInsnAddr());
1184
1185                         // unlink tempsink fallthrough edge
1186                         Edge * remove = work->edge();
1187                         remove->src()->removeTarget(remove);
1188                         factory().destroy_edge(remove, destroyed_noreturn);
1189                         continue;
1190                     }
1191                 } else if (ce->trg()) {
1192                     Address target = ce->trg_addr();
1193                     Function * ct = _parse_data->findFunc(frame.codereg,target);
1194                     bool is_plt = false;
1195
1196                     // check if associated call edge's return status is still unknown
1197                     if (ct && (ct->retstatus() == UNSET) ) {
1198                         // Delay parsing until we've finished the corresponding call edge
1199                         parsing_printf("[%s] Parsing FT edge %lx, corresponding callee (%s) return status unknown; delaying work\n",
1200                                        __FILE__,
1201                                        work->edge()->src()->lastInsnAddr(),
1202                                        ct->name().c_str());
1203
1204                         // Add work to ParseFrame's delayed list
1205                         frame.pushDelayedWork(work, ct);
1206
1207                         // Continue other work for this frame
1208                         continue;
1209                     }
1210
1211                     is_plt = HASHDEF(plt_entries,target);
1212
1213                     // CodeSource-defined tests 
1214                     is_nonret = obj().cs()->nonReturning(target);
1215                     if (is_nonret) {
1216                         parsing_printf("\t Disallowing FT edge: CodeSource reports nonreturning\n");
1217                     }
1218                     if (!is_nonret && is_plt) {
1219                         is_nonret |= obj().cs()->nonReturning(plt_entries[target]);
1220                         if (is_nonret) {
1221                             parsing_printf("\t Disallowing FT edge: CodeSource reports PLT nonreturning\n");
1222                         }
1223                     }
1224                     // Parsed return status tests
1225                     if (!is_nonret && !is_plt && ct) {
1226                         is_nonret |= (ct->retstatus() == NORETURN);
1227                         if (is_nonret) {
1228                             parsing_printf("\t Disallowing FT edge: function is non-returning\n");
1229                         }
1230                     }
1231                     // Call-stack tampering tests
1232                     if (unlikely(!is_nonret && frame.func->obj()->defensiveMode() && ct)) {
1233                         is_nonret |= (ct->retstatus() == UNKNOWN);
1234                         if (is_nonret) {
1235                             parsing_printf("\t Disallowing FT edge: function in "
1236                                                    "defensive binary may not return\n");
1237                             mal_printf("Disallowing FT edge: function %lx in "
1238                                                "defensive binary may not return\n", ct->addr());
1239                         } else {
1240                             StackTamper ct_tamper = ct->tampersStack();
1241                             is_nonret |= (TAMPER_NONZERO == ct_tamper);
1242                             is_nonret |= (TAMPER_ABS == ct_tamper);
1243                             if (is_nonret) {
1244                                 mal_printf("Disallowing FT edge: function at %lx "
1245                                                    "tampers with its stack\n", ct->addr());
1246                                 parsing_printf("\t Disallowing FT edge: function "
1247                                                        "tampers with its stack\n");
1248                             }
1249                         }
1250                     }
1251                     if (is_nonret) {
1252                         parsing_printf("[%s] no fallthrough for non-returning call "
1253                                                "to %lx at %lx\n",FILE__,target,
1254                                        work->edge()->src()->lastInsnAddr());
1255
1256                         // unlink tempsink fallthrough edge
1257                         Edge * remove = work->edge();
1258                         remove->src()->removeTarget(remove);
1259                         factory().destroy_edge(remove, destroyed_noreturn);
1260                         continue;
1261                     } else
1262                         // Invalidate cache_valid for all sharing functions
1263                         invalidateContainingFuncs(func, ce->src());
1264                 }
1265             }
1266         } else if (work->order() == ParseWorkElem::seed_addr) {
1267             cur = leadersToBlock[work->target()];
1268         } else if (work->order() == ParseWorkElem::resolve_jump_table) {
1269             // resume to resolve jump table
1270             auto work_ah = work->ah();
1271             parsing_printf("... continue parse indirect jump at %lx\n", work_ah->getAddr());
1272             Block *nextBlock = work->cur();
1273             if (nextBlock->last() != work_ah->getAddr()) {
1274                 // The block has been split
1275                 region_data * rd = _parse_data->findRegion(frame.codereg);
1276                 set<Block*> blocks;
1277                 rd->blocksByRange.find(work_ah->getAddr(), blocks);
1278                 for (auto bit = blocks.begin(); bit != blocks.end(); ++bit) {
1279                     if ((*bit)->last() == work_ah->getAddr()) {
1280                         nextBlock = (*bit);
1281                         break;
1282                     }
1283                 }
1284
1285             }
1286             ProcessCFInsn(frame,nextBlock,work->ah());
1287             continue;
1288         }
1289             // call fallthrough case where we have already checked that
1290             // the target returns. this is used in defensive mode.
1291         else if (work->order() == ParseWorkElem::checked_call_ft) {
1292             Edge* ce = bundle_call_edge(work->bundle());
1293             if (ce != NULL) {
1294                 invalidateContainingFuncs(func, ce->src());
1295             } else {
1296                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
1297                                FILE__,work->edge()->src()->lastInsnAddr());
1298             }
1299         }
1300
1301         if (NULL == cur) {
1302             pair<Block*,Edge*> newedge =
1303                     add_edge(frame,
1304                              frame.func,
1305                              work->edge()->src(),
1306                              work->target(),
1307                              work->edge()->type(),
1308                              work->edge());
1309             cur = newedge.first;
1310         }
1311
1312         if (HASHDEF(visited,cur->start()))
1313         {
1314             parsing_printf("[%s] skipping locally parsed target at %lx\n",
1315                            FILE__,work->target());
1316             continue;
1317         }
1318         visited[cur->start()] = true;
1319         leadersToBlock[cur->start()] = cur;
1320
1321         if (!cur->_parsed)
1322         {
1323             parsing_printf("[%s] parsing block %lx\n",
1324                            FILE__,cur->start());
1325             if (frame.func->obj()->defensiveMode()) {
1326                 mal_printf("new block at %lx (0x%lx)\n",cur->start(), cur);
1327             }
1328
1329             cur->_parsed = true;
1330             curAddr = cur->start();
1331         } else {
1332             parsing_printf("[%s] deferring parse of shared block %lx\n",
1333                            FILE__,cur->start());
1334             if (func->retstatus() < UNKNOWN) {
1335                 // we've parsed into another function, if we've parsed
1336                 // into it's entry point, set retstatus to match it
1337                 Function * other_func = _parse_data->findFunc(
1338                         func->region(), cur->start());
1339                 if (other_func && other_func->retstatus() > UNKNOWN) {
1340                     func->set_retstatus(other_func->retstatus());
1341                 } else {
1342                     func->set_retstatus(UNKNOWN);
1343                 }
1344             }
1345             // The edge to this shared block is changed from
1346             // "going to sink" to going to this shared block.
1347             // This changes the function boundary, so we need to
1348             // invalidate the cache.
1349             func->_cache_valid = false;
1350             continue;
1351         }
1352
1353         /*
1354          * External consumers of parsing may have invoked finalizing
1355          * methods from callback routines. Ensure that the caches
1356          * are invalidated because a new block is being added to the
1357          * function's view
1358          */
1359         func->_cache_valid = false;
1360
1361         // NB Using block's region() here because it may differ from the
1362         //    function's if control flow has jumped to another region
1363         unsigned size =
1364                 cur->region()->offset() + cur->region()->length() - curAddr;
1365         const unsigned char* bufferBegin =
1366                 (const unsigned char *)(func->isrc()->getPtrToInstruction(curAddr));
1367         InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
1368
1369         if (!ahPtr)
1370             ahPtr = InstructionAdapter_t::makePlatformIA_IAPI(func->obj()->cs()->getArch(), dec, curAddr, func->obj(),
1371                                                               cur->region(), func->isrc(), cur);
1372         else
1373             ahPtr->reset(dec,curAddr,func->obj(),
1374                          cur->region(), func->isrc(), cur);
1375
1376         InstructionAdapter_t * ah = ahPtr;
1377         
1378         nextBlockAddr = std::numeric_limits<Address>::max();
1379         auto nextBlockIter = frame.leadersToBlock.upper_bound(frame.curAddr);
1380         if (nextBlockIter != frame.leadersToBlock.end()) {
1381             nextBlockAddr = nextBlockIter->first;
1382             nextBlock = nextBlockIter->second;
1383         }
1384         bool isNopBlock = ah->isNop();
1385
1386         while(true) {
1387             curAddr = ah->getAddr();
1388             /** Check for straight-line fallthrough **/
1389             if (curAddr == nextBlockAddr) {
1390                 parsing_printf("[%s] straight-line parse into block at %lx\n",
1391                                FILE__,curAddr);
1392                 if (frame.func->obj()->defensiveMode()) {
1393                     mal_printf("straight-line parse into block at %lx\n",curAddr);
1394                 }
1395                 ah->retreat();
1396                 curAddr = ah->getAddr();
1397
1398                 end_block(cur,ahPtr);
1399                 pair<Block*,Edge*> newedge =
1400                         add_edge(frame,frame.func,cur,
1401                                  nextBlockAddr,FALLTHROUGH,NULL);
1402
1403                 if (!HASHDEF(visited,nextBlockAddr) &&
1404                     !HASHDEF(leadersToBlock,nextBlockAddr)) {
1405                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1406                                    FILE__,__LINE__,nextBlockAddr);
1407                     frame.pushWork(
1408                             frame.mkWork(
1409                                     NULL,
1410                                     newedge.second,
1411                                     nextBlockAddr,
1412                                     true,
1413                                     false)
1414                     );
1415                     /* preserved as example of leaky code; use mkWork instead
1416                     frame.pushWork(
1417                         new ParseWorkElem(
1418                             NULL,
1419                             newedge.second,
1420                             nextBlockAddr,
1421                             true,
1422                             false)
1423                         );
1424                     */
1425                     leadersToBlock[nextBlockAddr] = nextBlock;
1426                 }
1427                 break;
1428             } else if (curAddr > nextBlockAddr) {
1429                 parsing_printf("[%s:%d] inconsistent instruction stream: "
1430                                        "%lx is within [%lx,%lx)\n",
1431                                FILE__,__LINE__,curAddr,
1432                                nextBlock->start(),nextBlock->end());
1433                 Address prev_insn;
1434                 if (nextBlock->consistent(curAddr, prev_insn)) {
1435                     // The two overlapping blocks aligned.
1436                     // We need to split the large block, and create new edge to the later block
1437                     Block* new_block = split_block(frame.func, nextBlock, curAddr, prev_insn);
1438                     leadersToBlock[curAddr] = new_block;
1439                     visited[curAddr] = true;
1440                     ah->retreat();
1441                     end_block(cur, ah);
1442
1443                     add_edge(frame, frame.func, cur, curAddr, FALLTHROUGH, NULL);
1444
1445
1446
1447                     // We break from this loop because no need more stright-line parsing
1448                     break;
1449                 }
1450
1451                 // NB "cur" hasn't ended, so its range may
1452                 // not look like it overlaps with nextBlock
1453                 _pcb.overlapping_blocks(cur,nextBlock);
1454
1455                 tie(nextBlockAddr,nextBlock) =
1456                         func->get_next_block(frame.curAddr, frame.codereg);
1457             }
1458
1459             // per-instruction callback notification
1460             ParseCallback::insn_details insn_det;
1461             insn_det.insn = ah;
1462
1463             parsing_printf("[%s:%d] curAddr 0x%lx: %s \n",
1464                            FILE__,__LINE__,curAddr, insn_det.insn->getInstruction().format().c_str() );
1465
1466             if (func->_is_leaf_function) {
1467                 Address ret_addr;
1468                 func->_is_leaf_function = !(insn_det.insn->isReturnAddrSave(ret_addr));
1469                 parsing_printf("[%s:%d] leaf %d funcname %s \n",
1470                                FILE__,__LINE__,func->_is_leaf_function, func->name().c_str());
1471                 if (!func->_is_leaf_function) func->_ret_addr = ret_addr;
1472             }
1473
1474             _pcb.instruction_cb(func,cur,curAddr,&insn_det);
1475
1476             if (isNopBlock && !ah->isNop()) {
1477                 ah->retreat();
1478
1479                 end_block(cur,ahPtr);
1480                 pair<Block*,Edge*> newedge =
1481                         add_edge(frame,frame.func,cur,curAddr,FALLTHROUGH,NULL);
1482                 Block * targ = newedge.first;
1483
1484                 parsing_printf("[%s:%d] nop-block ended at %lx\n",
1485                                FILE__,__LINE__,curAddr);
1486                 if (targ && !HASHDEF(visited,targ->start())) {
1487                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1488                                    FILE__,__LINE__,targ->start());
1489                     frame.pushWork(
1490                             frame.mkWork(
1491                                     NULL,
1492                                     newedge.second,
1493                                     targ->start(),
1494                                     true,
1495                                     false)
1496                     );
1497                     leadersToBlock[targ->start()] = targ;
1498                 }
1499                 break;
1500             }
1501
1502             /** Particular instruction handling (calls, branches, etc) **/
1503             ++num_insns;
1504
1505             if(ah->hasCFT()) {
1506                 if (ah->isIndirectJump()) {
1507                     // Create a work element to represent that
1508                     // we will resolve the jump table later
1509                     end_block(cur,ahPtr);
1510                     frame.pushWork( frame.mkWork( work->bundle(), cur, ahPtr));
1511                 } else {
1512                     ProcessCFInsn(frame,cur,ahPtr);
1513                 }
1514                 break;
1515             } else if (func->_saves_fp &&
1516                        func->_no_stack_frame &&
1517                        ah->isFrameSetupInsn()) { // isframeSetup is expensive
1518                 func->_no_stack_frame = false;
1519             } else if (ah->isLeave()) {
1520                 func->_no_stack_frame = false;
1521             } else if ( ah->isAbort() ) {
1522                 // 4. `abort-causing' instructions
1523                 end_block(cur,ahPtr);
1524                 //link(cur, sink_block, DIRECT, true);
1525                 break;
1526             } else if ( ah->isInvalidInsn() ) {
1527                 // 4. Invalid or `abort-causing' instructions
1528                 end_block(cur,ahPtr);
1529                 link(cur, _sink, DIRECT, true);
1530                 break;
1531             } else if ( ah->isInterruptOrSyscall() ) {
1532                 // 5. Raising instructions
1533                 end_block(cur,ahPtr);
1534
1535                 pair<Block*,Edge*> newedge =
1536                         add_edge(frame,frame.func,cur,ah->getNextAddr(),FALLTHROUGH,NULL);
1537                 Block * targ = newedge.first;
1538
1539                 if (targ && !HASHDEF(visited,targ->start()) &&
1540                     !HASHDEF(leadersToBlock,targ->start())) {
1541                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1542                                    FILE__,__LINE__,targ->start());
1543                     frame.pushWork(
1544                             frame.mkWork(
1545                                     NULL,
1546                                     newedge.second,
1547                                     targ->start(),
1548                                     true,
1549                                     false)
1550                     );
1551                     leadersToBlock[targ->start()] = targ;
1552                 }
1553                 if (unlikely(func->obj()->defensiveMode())) {
1554                     fprintf(stderr,"parsed bluepill insn sysenter or syscall "
1555                             "in defensive mode at %lx\n",curAddr);
1556                 }
1557                 break;
1558             } else if (unlikely(func->obj()->defensiveMode())) {
1559                 if (!_pcb.hasWeirdInsns(func) && ah->isGarbageInsn()) {
1560                     // add instrumentation at this addr so we can
1561                     // extend the function if this really executes
1562                     ParseCallback::default_details det(
1563                             (unsigned char*) cur->region()->getPtrToInstruction(cur->lastInsnAddr()),
1564                             cur->end() - cur->lastInsnAddr(),
1565                             true);
1566                     _pcb.abruptEnd_cf(cur->lastInsnAddr(),cur,&det);
1567                     _pcb.foundWeirdInsns(func);
1568                     end_block(cur,ahPtr);
1569                     // allow invalid instructions to end up as a sink node.
1570                     link(cur, _sink, DIRECT, true);
1571                     break;
1572                 } else if (ah->isNopJump()) {
1573                     // patch the jump to make it a nop, and re-set the 
1574                     // instruction adapter so we parse the instruction
1575                     // as a no-op this time, allowing the subsequent
1576                     // instruction to be parsed correctly
1577                     mal_printf("Nop jump at %lx, changing it to nop\n",ah->getAddr());
1578                     _pcb.patch_nop_jump(ah->getAddr());
1579                     unsigned bufsize =
1580                             func->region()->offset() + func->region()->length() - ah->getAddr();
1581                     func->region()->offset() + func->region()->length() - ahPtr->getAddr();
1582                     const unsigned char* bufferBegin = (const unsigned char *)
1583                             (func->isrc()->getPtrToInstruction(ah->getAddr()));
1584                     dec = InstructionDecoder
1585                             (bufferBegin, bufsize, frame.codereg->getArch());
1586                     ah->reset(dec, curAddr, func->obj(),
1587                               func->region(), func->isrc(), cur);
1588                 } else {
1589                     entryID id = ah->getInstruction().getOperation().getID();
1590                     switch (id) {
1591                         case e_rdtsc:
1592                             fprintf(stderr,"parsed bluepill insn rdtsc at %lx\n",curAddr);
1593                             break;
1594                         case e_sldt:
1595                             fprintf(stderr,"parsed bluepill insn sldt at %lx\n",curAddr);
1596                             break;
1597                         default:
1598                             break;
1599                     }
1600                 }
1601             } else {
1602                 // default
1603             }
1604
1605             /** Check for overruns of valid address space **/
1606             if (!is_code(func,ah->getNextAddr())) {
1607                 parsing_printf("[%s] next insn %lx is invalid\n",
1608                                FILE__,ah->getNextAddr());
1609
1610                 end_block(cur,ahPtr);
1611                 // We need to tag the block with a sink edge
1612                 link(cur, _sink, DIRECT, true);
1613                 break;
1614             } else if (!cur->region()->contains(ah->getNextAddr())) {
1615                 parsing_printf("[%s] next address %lx is outside [%lx,%lx)\n",
1616                                FILE__,ah->getNextAddr(),
1617                                cur->region()->offset(),
1618                                cur->region()->offset()+cur->region()->length());
1619                 end_block(cur,ahPtr);
1620                 // We need to tag the block with a sink edge
1621                 link(cur, _sink, DIRECT, true);
1622                 break;
1623             }
1624             ah->advance();
1625         }
1626     }
1627     if (ahPtr) delete ahPtr;
1628     // Check if parsing is complete
1629     if (!frame.delayedWork.empty()) {
1630         frame.set_status(ParseFrame::FRAME_DELAYED);
1631         return;
1632     }
1633
1634     /** parsing complete **/
1635     if (HASHDEF(plt_entries,frame.func->addr())) {
1636 //        if (obj().cs()->nonReturning(frame.func->addr())) {
1637         if (obj().cs()->nonReturning(plt_entries[frame.func->addr()])) {
1638             frame.func->set_retstatus(NORETURN);
1639         } else {
1640             frame.func->set_retstatus(UNKNOWN);
1641         }
1642
1643         // Convenience -- adopt PLT name
1644         frame.func->_name = plt_entries[frame.func->addr()];
1645     } else if (frame.func->retstatus() == UNSET) {
1646         frame.func->set_retstatus(NORETURN);
1647     }
1648
1649     frame.set_status(ParseFrame::PARSED);
1650
1651     if (unlikely(obj().defensiveMode())) {
1652         // calculate this after setting the function to PARSED, so that when
1653         // we finalize the function we'll actually save the results and won't
1654         // re-finalize it
1655         func->tampersStack();
1656     }
1657     _pcb.newfunction_retstatus( func );
1658 }
1659
1660 void
1661 Parser::end_block(Block * b, InstructionAdapter_t * ah)
1662 {
1663     b->updateEnd(ah->getNextAddr());
1664     b->_lastInsn = ah->getAddr();
1665 }
1666
1667 Block*
1668 Parser::record_block(Block *b)
1669 {
1670     parsing_printf("[%s:%d] recording block [%lx,%lx)\n",
1671                    FILE__,__LINE__,b->start(),b->end());
1672     return _parse_data->record_block(b->region(),b);
1673 }
1674
1675
1676 // block_at should only be called when
1677 // we know the we want to create a block within the function.
1678 // So, we should not call this function to create the entry block
1679 // of a callee function.
1680 Block *
1681 Parser::block_at(ParseFrame &frame,
1682         Function * owner,
1683         Address addr,
1684         Block * & split)
1685 {
1686 //    ScopeLock<Mutex<true> > l(work_mutex);
1687
1688     Block * exist = NULL;
1689     Block * ret = NULL;
1690     Block * inconsistent = NULL;
1691     Address prev_insn = 0;
1692
1693     split = NULL;
1694     CodeRegion *cr;
1695     if(owner->region()->contains(addr))
1696         cr = owner->region();
1697     else
1698         cr = _parse_data->reglookup(owner->region(),addr);
1699
1700
1701     if(!is_code(owner,addr)) {
1702         parsing_printf("[%s] block address %lx rejected by isCode()\n",
1703                        FILE__,addr);
1704         return NULL;
1705     }
1706
1707
1708     {
1709 #ifdef ENABLE_RACE_DETECTION
1710     // this lock causes deadlock when running in parallel, but it is
1711     // useful for suppressing unimportant races on the iterator
1712     boost::lock_guard<Function> g(*owner);
1713 #endif
1714     // An already existing block
1715     auto iter = frame.leadersToBlock.find(addr);
1716     if (iter != frame.leadersToBlock.end()) {
1717         return iter->second;
1718     }
1719     // A block that may need to be split 
1720     iter = frame.leadersToBlock.upper_bound(addr);
1721     if (iter != frame.leadersToBlock.begin()) {
1722         --iter;
1723     }
1724     if (iter != frame.leadersToBlock.end()) {
1725         Block* b = iter->second;
1726         Address prev_insn;
1727         if (b->consistent(addr, prev_insn)) {
1728             ret = split_block(owner, b, addr, prev_insn);
1729             split = b;
1730             frame.visited[ret->start()] = true;
1731             return ret;
1732         }
1733     }
1734     ret = _cfgfact._mkblock(owner, cr, addr);
1735     ret = record_block(ret);
1736     return ret;
1737     }
1738 }
1739
1740 pair<Block *,Edge *>
1741 Parser::add_edge(
1742         ParseFrame & frame,
1743         Function * owner,
1744         Block * src,
1745         Address dst,
1746         EdgeTypeEnum et,
1747         Edge * exist)
1748 {
1749     Block * split = NULL;
1750     Block * ret = NULL;
1751     Block * original_block = NULL;
1752     Edge * newedge = NULL;
1753     pair<Block *, Edge *> retpair((Block *) NULL, (Edge *) NULL);
1754
1755     if(!is_code(owner,dst)) {
1756         parsing_printf("[%s:%d] target address %lx rejected by isCode()\n",
1757             FILE__, __LINE__, dst);
1758         return retpair;
1759     }
1760
1761     ret = block_at(frame, owner,dst, split);
1762     retpair.first = ret;
1763
1764     if(split == src) {
1765         // special case -- same block
1766         src = ret;
1767     }
1768
1769     if(et != CALL && frame.leadersToBlock.find(ret->start()) == frame.leadersToBlock.end()) {
1770         // If we created a new block, and this is not a function call,
1771         // then the new block is part of the current function.
1772         // We need to mark it
1773         frame.leadersToBlock[ret->start()] = ret;
1774     }
1775
1776     if(NULL == exist) {
1777         newedge = link(src,ret,et,false);
1778         retpair.second = newedge;
1779     } else {
1780         relink(exist,src,ret);
1781         retpair.second = exist;
1782     }
1783
1784     return retpair;
1785 }
1786
1787 Block *
1788 Parser::split_block(
1789         Function * owner,
1790         Block *b,
1791         Address addr,
1792         Address previnsn)
1793 {
1794     Block * ret;
1795     CodeRegion * cr;
1796     bool isRetBlock = false;
1797     if(owner->region()->contains(b->start()))
1798         cr = owner->region();
1799     else
1800         cr = _parse_data->reglookup(owner->region(),b->start());
1801     region_data * rd = _parse_data->findRegion(cr);
1802
1803     // enable for extra-safe testing, but callers are responsbible
1804     // assert(b->consistent(addr);
1805
1806     ret = factory()._mkblock(owner,cr,addr);
1807     Block * exist = record_block(ret);
1808     bool block_exist = false;
1809     if (exist != ret) {
1810         block_exist = true;
1811         ret = exist;
1812
1813     }
1814
1815     // move out edges
1816
1817     {
1818
1819         Block::edgelist &trgs = b->_trglist;
1820         Block::edgelist::iterator tit = trgs.begin();
1821         for (; tit != trgs.end(); ++tit) {
1822             Edge *e = *tit;
1823             e->_source = ret;
1824             ret->_trglist.push_back(e);
1825         }
1826         if (!trgs.empty() && RET == (*trgs.begin())->type()) {
1827             isRetBlock = true;
1828         }
1829         trgs.clear();
1830         if (!block_exist) {
1831             ret->updateEnd(b->end());
1832             ret->_lastInsn = b->_lastInsn;
1833             ret->_parsed = true;
1834         }
1835         b->updateEnd(addr);
1836         b->_lastInsn = previnsn;
1837         link(b,ret,FALLTHROUGH,false);
1838
1839         // Any functions holding b that have already been finalized
1840         // need to have their caches invalidated so that they will
1841         // find out that they have this new 'ret' block
1842         std::set<Function*> prev_owners;
1843         rd->findFuncs(b->start(),prev_owners);
1844         for(std::set<Function*>::iterator oit = prev_owners.begin();
1845             oit != prev_owners.end(); ++oit)
1846         {
1847             Function * po = *oit;
1848             if (po->_cache_valid) {
1849                 po->_cache_valid = false;
1850                 parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
1851                                        "func at %lx\n",
1852                                FILE__,__LINE__,b->start(),b->end(),po->addr());
1853             }
1854             if (isRetBlock) {
1855                 po->_retBL.clear(); //could remove b from the vector instead of clearing it, not sure what's cheaper
1856             }
1857         }
1858         // KEVINTODO: study performance impact of this callback
1859         _pcb.splitBlock(b,ret);
1860     }
1861
1862     return ret;
1863 }
1864
1865
1866 pair<Function *,Edge*>
1867 Parser::bind_call(ParseFrame & frame, Address target, Block * cur, Edge * exist)
1868 {
1869     Function * tfunc = NULL;
1870     Block * tblock = NULL;
1871
1872     // look it up
1873     tfunc = _parse_data->findFunc(frame.codereg,target);
1874     if(!tfunc) {
1875         parsing_printf("[%s:%d] can't bind call to %lx\n",
1876                        FILE__,__LINE__,target);
1877         return pair<Function*,Edge*>((Function *) NULL,exist);
1878     }
1879     assert(tfunc->entry());
1880     
1881     relink(exist,cur,tfunc->entry());
1882     return pair<Function*,Edge*>(tfunc,exist);
1883 }
1884
1885 Function *
1886 Parser::findFuncByEntry(CodeRegion *r, Address entry)
1887 {
1888     if(_parse_state < PARTIAL) {
1889         parsing_printf("[%s:%d] Parser::findFuncByEntry([%lx,%lx),%lx) "
1890                                "forced parsing\n",
1891                        FILE__,__LINE__,r->low(),r->high(),entry);
1892         parse();
1893     }
1894     return _parse_data->findFunc(r,entry);
1895 }
1896
1897 int
1898 Parser::findFuncs(CodeRegion *r, Address addr, set<Function *> & funcs)
1899 {
1900     if(_parse_state < COMPLETE) {
1901         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1902                                "forced parsing\n",
1903                        FILE__,__LINE__,r->low(),r->high(),addr);
1904         parse();
1905     }
1906     if(_parse_state < FINALIZED) {
1907         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1908                                "forced finalization\n",
1909                        FILE__,__LINE__,r->low(),r->high(),addr);
1910         finalize();
1911     }
1912     return _parse_data->findFuncs(r,addr,funcs);
1913 }
1914
1915 int
1916 Parser::findFuncs(CodeRegion *r, Address start, Address end, set<Function *> & funcs)
1917 {
1918     if(_parse_state < COMPLETE) {
1919         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
1920                                "forced parsing\n",
1921                        FILE__,__LINE__,r->low(),r->high(),start,end);
1922         parse();
1923     }
1924     if(_parse_state < FINALIZED) {
1925         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
1926                                "forced finalization\n",
1927                        FILE__,__LINE__,r->low(),r->high(),start,end);
1928         finalize();
1929     }
1930     return _parse_data->findFuncs(r,start,end,funcs);
1931 }
1932
1933 Block *
1934 Parser::findBlockByEntry(CodeRegion *r, Address entry)
1935 {
1936     if(_parse_state < PARTIAL) {
1937         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
1938                                "forced parsing\n",
1939                        FILE__,__LINE__,r->low(),r->high(),entry);
1940         parse();
1941     }
1942     return _parse_data->findBlock(r,entry);
1943 }
1944
1945 Block *
1946 Parser::findNextBlock(CodeRegion *r, Address addr)
1947 {
1948     if(_parse_state < PARTIAL) {
1949         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
1950                                "forced parsing\n",
1951                        FILE__,__LINE__,r->low(),r->high(),addr);
1952         parse();
1953     }
1954     return _parse_data->findRegion(r)->get_next_block(addr).second;
1955 }
1956
1957 int
1958 Parser::findBlocks(CodeRegion *r, Address addr, set<Block *> & blocks)
1959 {
1960     if(_parse_state < COMPLETE) {
1961         parsing_printf("[%s:%d] Parser::findBlocks([%lx,%lx),%lx,...) "
1962                                "forced parsing\n",
1963                        FILE__,__LINE__,r->low(),r->high(),addr);
1964         parse();
1965     }
1966     return _parse_data->findBlocks(r,addr,blocks);
1967 }
1968
1969 // find blocks without parsing.
1970 int Parser::findCurrentBlocks(CodeRegion* cr, Address addr,
1971                               std::set<Block*>& blocks) {
1972     return _parse_data->findBlocks(cr, addr, blocks);
1973 }
1974
1975 int Parser::findCurrentFuncs(CodeRegion * cr, Address addr, std::set<Function*> &funcs) {
1976     return _parse_data->findFuncs(cr, addr, funcs);
1977 }
1978
1979 Edge*
1980 Parser::link(Block *src, Block *dst, EdgeTypeEnum et, bool sink)
1981 {
1982     assert(et != NOEDGE);
1983     Edge * e = factory()._mkedge(src,dst,et);
1984     e->_type._sink = sink;
1985     src->addTarget(e);
1986     dst->addSource(e);
1987     _pcb.addEdge(src, e, ParseCallback::target);
1988     _pcb.addEdge(dst, e, ParseCallback::source);
1989     return e;
1990 }
1991
1992 /* 
1993  * During parsing, all edges are temporarily linked to the _tempsink
1994  * block. Adding and then removing edges to and from this block is
1995  * wasteful, especially given that removal is an O(N) operation with
1996  * vector storage. This call thus does not record edges into the sink.
1997  * These edges are guaranteed to have been relinked when parsing is
1998  * in state COMPLETE.
1999  *
2000  * NB This introduces an inconsistency into the CFG invariant that all
2001  *    targets of an edge have that edge in their source list if the
2002  *    data structures are queried during parsing. Extenders of 
2003  *    parsing callbacks are the only ones who can see this state;
2004  *    they have been warned and should know better.
2005  */
2006 Edge*
2007 Parser::link_tempsink(Block *src, EdgeTypeEnum et)
2008 {
2009     Block* tmpsink = parse_data()->findBlock(src->region(), std::numeric_limits<Address>::max());
2010     Edge * e = factory()._mkedge(src, tmpsink,et);
2011     e->_type._sink = true;
2012     src->_trglist.push_back(e);
2013     return e;
2014 }
2015
2016 void
2017 Parser::relink(Edge * e, Block *src, Block *dst)
2018 {
2019     unsigned long srcOut = 0, dstIn = 0, oldDstIn = 0;
2020     Block* oldDst = NULL;
2021     if(e->trg() && e->trg_addr() != std::numeric_limits<Address>::max()) {
2022         oldDst = e->trg();
2023     }
2024     bool addSrcAndDest = true;
2025     if(src != e->src()) {
2026         e->src()->removeTarget(e);
2027         _pcb.removeEdge(e->src(), e, ParseCallback::target);
2028         e->_source = src;
2029         src->addTarget(e);
2030         _pcb.addEdge(src, e, ParseCallback::target);
2031     }
2032     if(dst != e->trg()) {
2033         if(oldDst) // old edge was not sink
2034         {
2035             oldDst->removeSource(e);
2036             _pcb.removeEdge(e->trg(), e, ParseCallback::source);
2037             addSrcAndDest = false;
2038         }
2039         e->_target_off = dst->start();
2040         dst->addSource(e);
2041         _pcb.addEdge(dst, e, ParseCallback::source);
2042     }
2043     if (addSrcAndDest) {
2044         // We're re-linking a sinkEdge to be a non-sink edge; since
2045         // we don't inform PatchAPI of temporary sinkEdges, we have
2046         // to add both the source AND target edges
2047         _pcb.addEdge(src, e, ParseCallback::target);
2048     }
2049     if(parse_data()->findBlock(dst->region(), dst->start()) != dst) {
2050         assert(!"another block already exist!");
2051     }
2052     e->_type._sink = (dst->start() == std::numeric_limits<Address>::max());
2053 }
2054
2055 ParseFrame::Status
2056 Parser::frame_status(CodeRegion * cr, Address addr)
2057 {
2058     // XXX parsing frames may have been cleaned up, but status
2059     //     is always cached
2060     return _parse_data->frameStatus(cr,addr);
2061 }
2062
2063 void
2064 Parser::remove_func(Function *func)
2065 {
2066     if (sorted_funcs.end() != sorted_funcs.find(func)) {
2067         sorted_funcs.erase(func);
2068     }
2069     if (HINT == func->src()) {
2070         for (unsigned fidx=0; fidx < hint_funcs.size(); fidx++) {
2071             if (hint_funcs[fidx] == func) {
2072                 hint_funcs[fidx] = hint_funcs[hint_funcs.size()-1];
2073                 hint_funcs.pop_back();
2074                 break;
2075             }
2076         }
2077     }
2078     else {
2079         for (unsigned fidx=0; fidx < discover_funcs.size(); fidx++) {
2080             if (discover_funcs[fidx] == func) {
2081                 discover_funcs[fidx] = discover_funcs[discover_funcs.size()-1];
2082                 discover_funcs.pop_back();
2083                 break;
2084             }
2085         }
2086     }
2087
2088     _parse_data->remove_func(func);
2089 }
2090
2091 void
2092 Parser::remove_block(Dyninst::ParseAPI::Block *block)
2093 {
2094     boost::lock_guard<ParseData> g(*_parse_data, boost::adopt_lock);
2095     _parse_data->remove_block(block);
2096 }
2097
2098 void Parser::move_func(Function *func, Address new_entry, CodeRegion *new_reg)
2099 {
2100     region_data *reg_data = _parse_data->findRegion(func->region());
2101
2102     race_detector_fake_lock_acquire(race_detector_fake_lock(reg_data->funcsByAddr));
2103     {
2104       tbb::concurrent_hash_map<Address, Function*>::accessor a;
2105       if(reg_data->funcsByAddr.find(a, func->addr()))
2106         {
2107           reg_data->funcsByAddr.erase(a);
2108         }
2109       reg_data = _parse_data->findRegion(new_reg);
2110       reg_data->funcsByAddr.insert(a, make_pair(new_entry, func));
2111     }
2112     race_detector_fake_lock_release(race_detector_fake_lock(reg_data->funcsByAddr));
2113 }
2114
2115 void Parser::invalidateContainingFuncs(Function *owner, Block *b)
2116 {
2117     CodeRegion * cr;
2118     if(owner->region()->contains(b->start()))
2119         cr = owner->region();
2120     else
2121         cr = _parse_data->reglookup(owner->region(),b->start());
2122     region_data * rd = _parse_data->findRegion(cr);
2123
2124
2125     // Any functions holding b that have already been finalized
2126     // need to have their caches invalidated so that they will
2127     // find out that they have this new 'ret' block
2128     std::set<Function*> prev_owners;
2129     rd->findFuncs(b->start(),prev_owners);
2130     for(std::set<Function*>::iterator oit = prev_owners.begin();
2131         oit != prev_owners.end(); ++oit)
2132     {
2133         Function * po = *oit;
2134         po->_cache_valid = false;
2135         parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
2136                                "func at %lx\n",
2137                        FILE__,__LINE__,b->start(),b->end(),po->addr());
2138     }
2139 }
2140
2141 /* Add ParseFrames waiting on func back to the work queue */
2142 void Parser::resumeFrames(Function * func, LockFreeQueue<ParseFrame *> & work)
2143 {
2144     // If we do not know the function's return status, don't put its waiters back on the worklist
2145     if (func->retstatus() == UNSET) {
2146         parsing_printf("[%s] %s return status unknown, cannot resume waiters\n",
2147                        __FILE__,
2148                        func->name().c_str());
2149         return;
2150     }
2151     boost::lock_guard<DelayedFrames> g(delayed_frames);
2152
2153     // When a function's return status is set, all waiting frames back into the worklist
2154     map<Function *, set<ParseFrame *> >::iterator iter = delayed_frames.frames.find(func);
2155     if (iter == delayed_frames.frames.end()) {
2156         // There were no frames waiting, ignore
2157         parsing_printf("[%s] %s return status %d, no waiters\n",
2158                        __FILE__,
2159                        func->name().c_str(),
2160                        func->retstatus());
2161         return;
2162     } else {
2163         parsing_printf("[%s] %s return status %d, undelaying waiting functions\n",
2164                        __FILE__,
2165                        func->name().c_str(),
2166                        func->retstatus());
2167         // Add each waiting frame back to the worklist
2168         set<ParseFrame *> vec = iter->second;
2169         for (set<ParseFrame *>::iterator fIter = vec.begin();
2170              fIter != vec.end();
2171              ++fIter) {
2172             work.insert(*fIter);
2173         }
2174         // remove func from delayedFrames map
2175         delayed_frames.frames.erase(func);
2176     }
2177 }
2178
2179 bool Parser::getSyscallNumber(Function * /*func*/,
2180                               Block * block,
2181                               Address /*addr*/,
2182                               Architecture arch,
2183                               long int & val)
2184 {
2185     val = -1;
2186
2187     // In the common case, the setup of system call number
2188     // is a mov in the previous instruction. We don't currently look elsewhere.
2189     // In the future, could use slicing and symeval to find the value of
2190     // this register at the system call (as unstrip does).
2191     Block::Insns blockInsns;
2192     block->getInsns(blockInsns);
2193     auto prevPair = ++(blockInsns.rbegin());
2194     InstructionAPI::Instruction prevInsn = prevPair->second;
2195     if (prevInsn.getOperation().getID() != e_mov) {
2196         return false;
2197     }
2198
2199     MachRegister syscallNumberReg = MachRegister::getSyscallNumberReg(arch);
2200     if (syscallNumberReg == InvalidReg) {
2201         return false;
2202     }
2203     InstructionAPI::RegisterAST* regAST = new InstructionAPI::RegisterAST(syscallNumberReg);
2204     InstructionAPI::RegisterAST::Ptr regASTPtr = InstructionAPI::RegisterAST::Ptr(regAST);
2205
2206     std::vector<InstructionAPI::Operand> operands;
2207     prevInsn.getOperands(operands);
2208     for (unsigned i = 0; i < operands.size(); i++) {
2209         if (!operands[i].isWritten(regASTPtr)) {
2210             InstructionAPI::Expression::Ptr value = operands[i].getValue();
2211             InstructionAPI::Result res = value->eval();
2212             if (res.defined) {
2213                 val = res.convert<Offset>();
2214             }
2215         }
2216     }
2217
2218     return (val != -1);
2219 }
2220