Merge John's changes and update test suite.
[dyninst.git] / parseAPI / src / Parser.C
1 /*
2  * See the dyninst/COPYRIGHT file for copyright information.
3  * 
4  * We provide the Paradyn Tools (below described as "Paradyn")
5  * on an AS IS basis, and do not warrant its validity or performance.
6  * We reserve the right to update, modify, or discontinue this
7  * software at any time.  We shall have no obligation to supply such
8  * updates or modifications or any other form of support to you.
9  * 
10  * By your use of Paradyn, you understand and agree that we (or any
11  * other person or entity with proprietary rights in Paradyn) are
12  * under no obligation to provide either maintenance services,
13  * update services, notices of latent defects, or correction of
14  * defects for Paradyn.
15  * 
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  * 
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  * 
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29  */
30
31 #include <omp.h>
32
33 #include <vector>
34 #include <limits>
35
36 // For Mutex
37 #define PROCCONTROL_EXPORTS
38
39 #include "dyntypes.h"
40
41 #include "CodeObject.h"
42 #include "CFGFactory.h"
43 #include "ParseCallback.h"
44 #include "Parser.h"
45 #include "Parser.h"
46 #include "CFG.h"
47 #include "util.h"
48 #include "debug_parse.h"
49
50 #include <boost/bind/bind.hpp>
51
52
53 #include <boost/timer/timer.hpp>
54 #include <fstream>
55
56 #ifdef ENABLE_RACE_DETECTION
57 #define USE_CILK 1
58 #else
59 #define USE_OPENMP 1
60 #endif
61
62 #if USE_CILK
63 #include <cilk/cilk.h>
64 #include <cilk/cilk_api.h>
65 #endif
66
67 #include "race-detector-annotations.h"
68
69 using namespace std;
70 using namespace Dyninst;
71 using namespace Dyninst::ParseAPI;
72 using namespace Dyninst::InstructionAPI;
73
74 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
75 typedef vector< edge_pair_t > Edges_t;
76
77 #include "common/src/dthread.h"
78
79 namespace {
80     struct less_cr {
81         bool operator()(CodeRegion * x, CodeRegion * y)
82         {
83             return x->offset() < y->offset();
84         }
85     };
86 }
87
88 Parser::Parser(CodeObject & obj, CFGFactory & fact, ParseCallbackManager & pcb) :
89         _obj(obj),
90         _cfgfact(fact),
91         _pcb(pcb),
92         _parse_data(NULL),
93         _parse_state(UNPARSED),
94         _in_parse(false),
95         _in_finalize(false)
96 {
97     delayed_frames.size = 0;
98     // cache plt entries for fast lookup
99     const map<Address, string> & lm = obj.cs()->linkage();
100     map<Address, string>::const_iterator lit = lm.begin();
101     for( ; lit != lm.end(); ++lit) {
102         parsing_printf("Cached PLT entry %s (%lx)\n",lit->second.c_str(),lit->first);
103         plt_entries[lit->first] = lit->second;
104     }
105
106     if(obj.cs()->regions().empty()) {
107         parsing_printf("[%s:%d] CodeSource provides no CodeRegions"
108                                " -- unparesable\n",
109                        FILE__,__LINE__);
110         _parse_state = UNPARSEABLE;
111         return;
112     }
113
114     // check whether regions overlap
115     vector<CodeRegion *> const& regs = obj.cs()->regions();
116     vector<CodeRegion *> copy(regs.begin(),regs.end());
117     sort(copy.begin(),copy.end(),less_cr());
118
119     // allocate a sink block -- region is arbitrary
120
121     bool overlap = false;
122     CodeRegion * prev = copy[0], *cur = NULL;
123     for(unsigned i=1;i<copy.size();++i) {
124         cur = copy[i];
125         if(cur->offset() < prev->offset() + prev->length()) {
126             parsing_printf("Overlapping code regions [%lx,%lx) and [%lx,%lx)\n",
127                            prev->offset(),prev->offset()+prev->length(),
128                            cur->offset(),cur->offset()+cur->length());
129             overlap = true;
130             break;
131         }
132     }
133
134     if(overlap)
135         _parse_data = new OverlappingParseData(this,copy);
136     else
137         _parse_data = new StandardParseData(this);
138 }
139
140 ParseFrame::~ParseFrame()
141 {
142     cleanup(); // error'd frames still need cleanup
143 }
144
145 Parser::~Parser()
146 {
147     if(_parse_data)
148         delete _parse_data;
149
150     for(auto fit = frames.begin() ; fit != frames.end(); ++fit)
151         delete *fit;
152
153     frames.clear();
154 }
155
156 void
157 Parser::add_hint(Function * f)
158 {
159     if(!_parse_data->findFunc(f->region(),f->addr()))
160         record_func(f);
161 }
162
163 template <typename T>
164 std::string pair_to_string(T pr)
165 {
166     std::stringstream s;
167     s << pr.first << ", " << pr.second;
168     return s.str();
169 }
170
171
172 void
173 Parser::parse()
174 {
175     parsing_printf("[%s:%d] parse() called on Parser %p with state %d\n",
176                    FILE__,__LINE__,this, _parse_state);
177
178     // For modification: once we've full-parsed once, don't do it again
179     if (_parse_state >= COMPLETE) return;
180
181     if(_parse_state == UNPARSEABLE)
182         return;
183
184     ScopeLock<Mutex<true> > L(parse_mutex);
185 //    assert(!_in_parse);
186     _in_parse = true;
187
188     parse_vanilla();
189     finalize();
190     // anything else by default...?
191
192     if(_parse_state < COMPLETE)
193         _parse_state = COMPLETE;
194
195     _in_parse = false;
196     parsing_printf("[%s:%d] parsing complete for Parser %p with state %d\n", FILE__, __LINE__, this, _parse_state);
197 #ifdef ADD_PARSE_FRAME_TIMERS
198     std::ofstream stat_log("functions.csv");
199     stat_log << "Results for " << time_histogram.size() << " buckets\n";
200     stat_log << "usecs,count\n";
201     std::transform(time_histogram.begin(), time_histogram.end(),
202                    std::ostream_iterator<std::string >(stat_log, "\n"),
203                    pair_to_string<std::pair<unsigned int, unsigned int> >);
204 #endif
205 }
206
207 void
208 Parser::parse_at(
209         CodeRegion * region,
210         Address target,
211         bool recursive,
212         FuncSource src)
213 {
214     Function *f;
215     ParseFrame *pf;
216     LockFreeQueue<ParseFrame *> work;
217
218     parsing_printf("[%s:%d] entered parse_at([%lx,%lx),%lx)\n",
219                    FILE__,__LINE__,region->low(),region->high(),target);
220
221     if(!region->contains(target)) {
222         parsing_printf("\tbad address, bailing\n");
223         return;
224     }
225
226     if(_parse_state < PARTIAL)
227         _parse_state = PARTIAL;
228
229     f = _parse_data->get_func(region,target,src);
230     if(!f) {
231         parsing_printf("   could not create function at %lx\n",target);
232         return;
233     }
234
235     ParseFrame::Status exist = _parse_data->frameStatus(region,target);
236     if(exist != ParseFrame::BAD_LOOKUP && exist != ParseFrame::UNPARSED) {
237         parsing_printf("   function at %lx already parsed, status %d\n",
238                        target, exist);
239         return;
240     }
241
242     if(!(pf = _parse_data->findFrame(region,target))) {
243         pf = new ParseFrame(f,_parse_data);
244         init_frame(*pf);
245         frames.insert(pf);
246         _parse_data->record_frame(pf);
247     }
248
249     work.insert(pf);
250     parse_frames(work,recursive);
251
252     // downgrade state if necessary
253     if(_parse_state > COMPLETE)
254         _parse_state = COMPLETE;
255
256 }
257
258 void
259 Parser::parse_at(Address target, bool recursive, FuncSource src)
260 {
261     CodeRegion * region = NULL;
262
263     parsing_printf("[%s:%d] entered parse_at(%lx)\n",FILE__,__LINE__,target);
264
265     if(_parse_state == UNPARSEABLE)
266         return;
267
268     StandardParseData * spd = dynamic_cast<StandardParseData *>(_parse_data);
269     if(!spd) {
270         parsing_printf("   parse_at is invalid on overlapping regions\n");
271         return;
272     }
273
274     region = spd->reglookup(region,target); // input region ignored for SPD
275     if(!region) {
276         parsing_printf("   failed region lookup at %lx\n",target);
277         return;
278     }
279
280     parse_at(region,target,recursive,src);
281 }
282
283 void
284 Parser::parse_vanilla()
285 {
286     LockFreeQueue<ParseFrame *> work;
287
288     parsing_printf("[%s:%d] entered parse_vanilla()\n",FILE__,__LINE__);
289     parsing_printf("\t%d function hints\n",hint_funcs.size());
290
291     if(_parse_state < PARTIAL)
292         _parse_state = PARTIAL;
293     else
294         parsing_printf("\tparse state is %d, some parsing already done\n",
295                        _parse_state);
296
297     /* Initialize parse frames from hints */
298
299     // Note: there is no fundamental obstacle to parallelizing this loop. However,
300     // race conditions need to be resolved in supporting laysrs first.
301     for (unsigned int i = 0; i < hint_funcs.size(); i++) {
302         Function * hf = hint_funcs[i];
303         ParseFrame::Status test = frame_status(hf->region(),hf->addr());
304         if(test != ParseFrame::BAD_LOOKUP &&
305            test != ParseFrame::UNPARSED)
306         {
307             parsing_printf("\tskipping repeat parse of %lx [%s]\n",
308                            hf->addr(),hf->name().c_str());
309             continue;
310         }
311
312         ParseFrame *pf = new ParseFrame(hf,_parse_data);
313         init_frame(*pf);
314         frames.insert(pf);
315         work.insert(pf);
316         _parse_data->record_frame(pf);
317     }
318
319     parse_frames(work,true);
320 }
321
322 void
323 Parser::parse_edges( vector< ParseWorkElem * > & work_elems )
324 {
325     if(_parse_state == UNPARSEABLE)
326         return;
327
328     // build up set of needed parse frames and load them with work elements
329     set<ParseFrame*> frameset; // for dup checking
330     LockFreeQueue<ParseFrame*> frames;
331
332     for (unsigned idx=0; idx < work_elems.size(); idx++) {
333
334         ParseWorkElem *elem = work_elems[idx];
335         Block *src = elem->edge()->src();
336
337         if (elem->order() == ParseWorkElem::call_fallthrough)
338         {
339             Edge *callEdge = NULL;
340             boost::lock_guard<Block> g(*src);
341             Block::edgelist trgs = src->targets();
342             for (Block::edgelist::iterator eit = trgs.begin();
343                  eit != trgs.end();
344                  eit++)
345             {
346                 if ((*eit)->type() == CALL) {
347                     callEdge = *eit;
348                     if (!(*eit)->sinkEdge()) // if it's a sink edge, look for nonsink CALL
349                         break;
350                 }
351             }
352             // create a call work elem so that the bundle is complete
353             // and set the target function's return status and 
354             // tamper to RETURN and TAMPER_NONE, respectively
355             //assert(callEdge);
356
357             // When we have a direct call to unallocated memory or garbage
358             // we there is no call edge when the call is first executed. I'm
359             // not sure why not, so I'm attempting to continue past here without
360             // fixing up the called function...
361             // Also, in the case I saw, the callee was _not_ returning. Not directly. It 
362             // led into a big case with a longjmp() equivalent. 
363
364             if (callEdge)
365             {
366                 bool isResolvable = false;
367                 Address callTarget = 0;
368                 if ( ! callEdge->sinkEdge() )
369                 {
370                     isResolvable = true;
371                     callTarget = callEdge->trg()->start();
372                     // the call target may be in another Code Object
373                     Function *callee = callEdge->trg()->obj()->findFuncByEntry(
374                             callEdge->trg()->region(), callTarget);
375                     assert(callee);
376                     callee->set_retstatus(RETURN);
377                     callee->_tamper = TAMPER_NONE;
378                 }
379                 elem->bundle()->add(new ParseWorkElem
380                                             ( elem->bundle(),
381                                               callEdge,
382                                               callTarget,
383                                               isResolvable,
384                                               false ));
385             }
386         }
387         ParseFrame *frame = _parse_data->findFrame
388                 ( src->region(),
389                   src->lastInsnAddr() );
390         bool isNewFrame = false;
391         if (!frame) {
392             vector<Function*> funcs;
393             src->getFuncs(funcs);
394             frame = new ParseFrame(*funcs.begin(),_parse_data);
395             for (unsigned fix=1; fix < funcs.size(); fix++) {
396                 // if the block is shared, all of its funcs need
397                 // to add the new edge
398                 funcs[fix]->_cache_valid = false;
399             }
400             isNewFrame = true;
401         }
402
403         // push before frame init so no seed is added
404         if (elem->bundle()) {
405             frame->work_bundles.push_back(elem->bundle());
406         }
407         frame->pushWork(elem);
408         if (isNewFrame) {
409             init_frame(*frame);
410         }
411
412         if (frameset.end() == frameset.find(frame)) {
413             frameset.insert(frame);
414             frames.insert(frame);
415         }
416     }
417     ScopeLock<Mutex<true> > L(parse_mutex);
418     // now parse
419     if(_parse_state < PARTIAL)
420         _parse_state = PARTIAL;
421     _in_parse = true;
422
423     parse_frames( frames, true );
424
425     if(_parse_state > COMPLETE)
426         _parse_state = COMPLETE;
427     _in_parse = false;
428
429     finalize();
430
431 }
432
433
434 LockFreeQueueItem<ParseFrame*> *
435 Parser::ProcessOneFrame(ParseFrame* pf, bool recursive) {
436   LockFreeQueueItem<ParseFrame*> *frame_list = 0;
437   if (pf->func && !pf->swap_busy(true)) {
438 #ifdef ADD_PARSE_FRAME_TIMERS
439     boost::timer::cpu_timer t;
440     t.start();
441 #endif
442     parse_frame(*pf,recursive);
443 #ifdef ADD_PARSE_FRAME_TIMERS
444     t.stop();
445     unsigned int msecs = floor(t.elapsed().wall / 1000000.0);
446     race_detector_fake_lock_acquire(race_detector_fake_lock(time_histogram));
447     {
448       tbb::concurrent_hash_map<unsigned int, unsigned int>::accessor a;
449       time_histogram.insert(a, msecs);
450       ++(a->second);
451     }
452     race_detector_fake_lock_release(race_detector_fake_lock(time_histogram));
453 #endif
454     frame_list = postProcessFrame(pf, recursive);
455
456     // exclusive access to each ParseFrame is mediated by marking a frame busy.
457     // we clear evidence of our access to the ParseFrame here because a concurrent
458     // thread may try to touch it because of duplicates in the work list. that
459     // won't actually be concurrent because of swap_busy. we suppress the race
460     // report by scrubbing information about our access.
461     race_detector_forget_access_history(pf, sizeof(*pf));
462
463     pf->swap_busy(false);
464   }
465   return frame_list;
466 }
467
468 LockFreeQueueItem<ParseFrame *> *Parser::postProcessFrame(ParseFrame *pf, bool recursive) {
469     LockFreeQueue<ParseFrame*> work;
470     boost::lock_guard<ParseFrame> g(*pf);
471     switch(pf->status()) {
472         case ParseFrame::CALL_BLOCKED: {
473             parsing_printf("[%s] frame %lx blocked at %lx\n",
474                            FILE__,pf->func->addr(),pf->curAddr);
475             {
476
477                 assert(pf->call_target);
478
479                 parsing_printf("    call target %lx\n",pf->call_target->addr());
480                 work.insert(pf);
481
482                 CodeRegion * cr = pf->call_target->region();
483                 Address targ = pf->call_target->addr();
484                 ParseFrame * tf = _parse_data->findFrame(cr,targ);
485                 if(!tf)
486                 {
487                     // sanity
488                     if(_parse_data->frameStatus(cr,targ) == ParseFrame::PARSED)
489                         assert(0);
490
491                     tf = new ParseFrame(pf->call_target,_parse_data);
492                     init_frame(*tf);
493                     frames.insert(tf);
494                     _parse_data->record_frame(tf);
495                 }
496                 if(likely(recursive))
497                     work.insert(tf);
498                 else {
499                     assert(0);
500                     // XXX should never get here
501                     //parsing_printf("    recursive parsing disabled\n");
502                 }
503
504             }
505             break;
506         }
507         case ParseFrame::PARSED:{
508             parsing_printf("[%s] frame %lx complete, return status: %d\n",
509                            FILE__,pf->func->addr(),pf->func->retstatus());
510
511             if (unlikely(_obj.defensiveMode() &&
512                          TAMPER_NONE != pf->func->tampersStack() &&
513                          TAMPER_NONZERO != pf->func->tampersStack() ))
514             {   // may adjust CALL_FT targets, add a frame to the worklist,
515                 // or trigger parsing in a separate target CodeObject
516                 tamper_post_processing(work,pf);
517             }
518
519             /* add waiting frames back onto the worklist */
520             resumeFrames(pf->func, work);
521
522             pf->cleanup();
523             break;
524         }
525         case ParseFrame::FRAME_ERROR:
526             parsing_printf("[%s] frame %lx error at %lx\n",
527                            FILE__,pf->func->addr(),pf->curAddr);
528             break;
529         case ParseFrame::FRAME_DELAYED:
530         {
531             boost::lock_guard<DelayedFrames> g(delayed_frames);
532             parsing_printf("[%s] frame %lx delayed at %lx\n",
533                                                                                   FILE__,
534                                                                                   pf->func->addr(),
535                                                                                   pf->curAddr);
536
537             if (pf->delayedWork.size()) {
538                 // Add frame to global delayed list
539
540                 for (auto iter = pf->delayedWork.begin();
541                      iter != pf->delayedWork.end();
542                      ++iter) {
543
544                     Function * ct = iter->second;
545                     parsing_printf("[%s] waiting on %s\n",
546                                    __FILE__,
547                                    ct->name().c_str());
548                     {
549
550                         auto fIter = delayed_frames.frames.find(ct);
551                         if (fIter == delayed_frames.frames.end()) {
552                             set<ParseFrame *> waiters;
553                             waiters.insert(pf);
554                             delayed_frames.frames[ct] = waiters;
555                         } else {
556                             delayed_frames.frames[ct].insert(pf);
557                         }
558                     }
559                 }
560             } else {
561                 // We shouldn't get here
562                 assert(0 && "Delayed frame with no delayed work");
563             }
564
565             /* if the return status of this function has been updated, add
566              * waiting frames back onto the work list */
567             resumeFrames(pf->func, work);
568             break;
569         }
570         case ParseFrame::PROGRESS:
571             // another thread is working on this already; possibly something else unblocked.
572             break;
573
574         default:
575             assert(0 && "invalid parse frame status");
576     }
577     pf->inProcess = false;
578     return work.steal();
579 }
580
581
582 static void
583 InsertFrames
584 (
585  LockFreeQueueItem<ParseFrame *> *frames,
586  LockFreeQueue<ParseFrame *> *q
587 )
588 {
589   if (frames) {
590     LockFreeQueue<ParseFrame *> myq(frames);
591     q->splice(myq);
592   }
593 }
594
595
596 void
597 Parser::LaunchWork
598 (
599  LockFreeQueueItem<ParseFrame*> *frame_list,
600  bool recursive
601 )
602 {
603   LockFreeQueue<ParseFrame *> private_queue(frame_list);
604   for(;;) {
605     LockFreeQueueItem<ParseFrame *> *first = private_queue.pop();
606     if (first == 0) break;
607     ParseFrame *frame = first->value();
608     delete first;
609 #if USE_OPENMP
610 #pragma omp task firstprivate(frame, recursive)
611     SpawnProcessFrame(frame, recursive);
612 #else
613     cilk_spawn SpawnProcessFrame(frame, recursive);
614 #endif
615   }
616 }
617
618
619 void
620 Parser::SpawnProcessFrame
621 (
622  ParseFrame *pf,
623  bool recursive
624 )
625 {
626   LockFreeQueueItem<ParseFrame*> *new_frames = ProcessOneFrame(pf, recursive);
627   LaunchWork(new_frames, recursive);
628 }
629
630 void print_work_queue(LockFreeQueue<ParseFrame *> *work_queue)
631 {
632   LockFreeQueueItem<ParseFrame *> *current = work_queue->peek();
633
634   std::cout << "Work Queue" << std::endl;
635   while (current) {
636     std::cout << "  parse frame " << std::hex << current->value()
637               << std::dec << std::endl;
638     current = current->next();
639   }
640 }
641
642
643 void
644 Parser::ProcessFrames
645 (
646  LockFreeQueue<ParseFrame *> *work_queue,
647  bool recursive
648 )
649 {
650 #if USE_OPENMP
651 #pragma omp parallel shared(work_queue)
652   {
653 #pragma omp master
654     LaunchWork(work_queue->steal(), recursive);
655   }
656 #else
657   LaunchWork(work_queue->steal(), recursive);
658 #endif
659 }
660
661
662 void
663 Parser::parse_frames(LockFreeQueue<ParseFrame *> &work, bool recursive)
664 {
665     ProcessFrames(&work, recursive);
666
667     bool done = false, cycle = false;
668     {
669         boost::lock_guard<DelayedFrames> g(delayed_frames);
670         if(delayed_frames.frames.empty()) {
671             parsing_printf("[%s] Fixed point reached (0 funcs with unknown return status)\n)",
672                            __FILE__);
673             delayed_frames.size = 0;
674             done = true;
675         } else if(delayed_frames.size == delayed_frames.frames.size()) {
676             cycle = true;
677         }
678     }
679
680     // Use fixed-point to ensure we parse frames whose parsing had to be delayed
681     if (!done) {
682         if(cycle) {
683             processCycle(work,recursive);
684         } else {
685             processFixedPoint(work, recursive);
686         }
687     }
688
689     cleanup_frames(work);
690 }
691
692 void Parser::processFixedPoint(LockFreeQueue<ParseFrame *> &work, bool recursive) {// We haven't yet reached a fixedpoint; let's recurse
693     {
694         boost::lock_guard<DelayedFrames> g(delayed_frames);
695
696         parsing_printf("[%s] Fixed point not yet reached (%d funcs with unknown return status)\n",
697                        __FILE__,
698                        delayed_frames.frames.size());
699
700         // Update delayed_frames.size for next iteration
701         delayed_frames.size = delayed_frames.frames.size();
702
703         // Check if we can resume any frames yet
704         vector<Function *> updated;
705         for (auto iter = delayed_frames.frames.begin();
706              iter != delayed_frames.frames.end();
707              ++iter) {
708             if (iter->first->retstatus() != UNSET) {
709                 updated.push_back(iter->first);
710             }
711         }
712
713         if (updated.size()) {
714             for (auto uIter = updated.begin();
715                  uIter != updated.end();
716                  ++uIter) {
717                 resumeFrames((*uIter), work);
718             }
719         }
720     }
721     // Recurse through parse_frames
722     parsing_printf("[%s] Calling parse_frames again... \n", __FILE__);
723
724     parse_frames(work, recursive);
725 }
726
727 void Parser::processCycle(LockFreeQueue<ParseFrame *> &work, bool recursive) {// If we've reached a fixedpoint and have remaining frames, we must
728     // have a cyclic dependency
729     vector<Function *> updated;
730     {
731         boost::lock_guard<DelayedFrames> g(delayed_frames);
732         parsing_printf("[%s] Fixed point reached (%d funcs with unknown return status)\n",
733                        __FILE__,
734                        delayed_frames.frames.size());
735
736         // Mark UNSET functions in cycle as NORETURN
737         // except if we're doing non-recursive parsing.
738         // If we're just parsing one function, we want
739         // to mark everything RETURN instead.
740         for (auto iter = delayed_frames.frames.begin(); iter != delayed_frames.frames.end(); ++iter) {
741             Function * func = iter->first;
742             if (func->retstatus() == UNSET) {
743                 if(recursive)
744                 {
745                     func->set_retstatus(NORETURN);
746                     func->obj()->cs()->incrementCounter(PARSE_NORETURN_HEURISTIC);
747                 }
748                 else
749                 {
750                     func->set_retstatus(RETURN);
751                 }
752                 updated.push_back(func);
753             }
754
755             set<ParseFrame *> vec = iter->second;
756             for (auto vIter = vec.begin(); vIter != vec.end(); ++vIter) {
757                 Function * delayed = (*vIter)->func;
758                 if (delayed->retstatus() == UNSET) {
759                     delayed->set_retstatus(NORETURN);
760                     delayed->obj()->cs()->incrementCounter(PARSE_NORETURN_HEURISTIC);
761                     updated.push_back(func);
762                 }
763             }
764         }
765
766     }
767
768     // We should have updated the return status of one or more frames; recurse
769     if (updated.size()) {
770         for (auto uIter = updated.begin();
771              uIter != updated.end();
772              ++uIter) {
773             resumeFrames((*uIter), work);
774         }
775
776         if (work.peek()) {
777             parsing_printf("[%s] Updated retstatus of delayed frames, trying again...\n", __FILE__);
778             parse_frames(work, recursive);
779         }
780     } else {
781         // We shouldn't get here
782         parsing_printf("[%s] No more work can be done (%d funcs with unknown return status)\n",
783                        __FILE__,
784                        delayed_frames.frames.size());
785         assert(0);
786     }
787 }
788
789 void Parser::cleanup_frames()  {
790   vector <ParseFrame *> pfv;
791   std::copy(frames.begin(), frames.end(), std::back_inserter(pfv));
792 #if USE_OPENMP
793 #pragma omp parallel for schedule(auto)
794   for (unsigned int i = 0; i < pfv.size(); i++) {
795     ParseFrame *pf = pfv[i];
796     if (pf) {
797       _parse_data->remove_frame(pf);
798       delete pf;
799     }
800   }
801   frames.clear();
802 #elif USE_CILK
803   cilk_for(unsigned i=0; i < pfv.size(); ++i) {
804     ParseFrame *pf = pfv[i];
805     if (pf) {
806       _parse_data->remove_frame(pf);
807       delete pf;
808     }
809   }
810 #else
811   for(unsigned i=0; i < pfv.size(); ++i) {
812     ParseFrame *pf = pfv[i];
813     if (pf) {
814       _parse_data->remove_frame(pf);
815       delete pf;
816     }
817   }
818 #endif
819   frames.clear();
820 }
821
822 /* Finalizing all functions for consumption:
823   
824    - Finish delayed parsing
825    - Prepare and record FuncExtents for range-based lookup
826 */
827
828 // Could set cache_valid depending on whether the function is currently
829 // being parsed somewhere. 
830
831 void Parser::finalize_block(Block* b, Function* owner)
832 {
833     Address addr = b->start();
834     Address prev_insn = b->lastInsnAddr();
835     set<Block *> overlap;
836     CodeRegion *cr;
837     if(owner->region()->contains(addr))
838         cr = owner->region();
839     else
840         cr = _parse_data->reglookup(owner->region(),addr);
841
842     Block* exist = _parse_data->findBlock(cr, addr);
843     if(NULL == exist)
844     {
845         _parse_data->findBlocks(cr,addr,overlap);
846         if(overlap.size() > 1)
847             parsing_printf("[%s] address %lx overlapped by %d blocks\n",
848                            FILE__,addr,overlap.size());
849
850         /* Platform specific consistency test:
851            generally checking for whether the address is an
852            instruction boundary in a block */
853         for(set<Block *>::iterator sit=overlap.begin();sit!=overlap.end();++sit)
854         {
855             Block * ob = *sit;
856
857             // consistent will fill in prev_insn with the address of the
858             // instruction preceeding addr if addr is consistent
859             if(ob->consistent(addr,prev_insn)) {
860                 exist = b;
861                 break;
862             } else {
863                 parsing_printf("[%s] %lx is inconsistent with [%lx,%lx)\n",
864                                FILE__,addr,b->start(),b->end());
865                 _pcb.overlapping_blocks(ob,b);
866             }
867         }
868
869     }
870     if(exist) {
871         if(exist->start() != addr) {
872             Block* ret = split_block(owner, b, addr, prev_insn);
873             record_block(ret);
874         }
875     }
876     _pcb.addBlock(owner, b);
877
878 }
879
880 void
881 Parser::finalize(Function *f)
882 {
883     boost::unique_lock<boost::mutex> finalize_write_lock(finalize_mutex);
884     if(f->_cache_valid) {
885         return;
886     }
887
888     if(!f->_parsed) {
889         parsing_printf("[%s:%d] Parser::finalize(f[%lx]) "
890                                "forced parsing\n",
891                        FILE__,__LINE__,f->addr());
892         parse_at(f->addr(), true, f->src());
893     }
894
895     bool cache_value = true;
896     /* this is commented out to prevent a failure in tampersStack, but
897            this may be an incorrect approach to fixing the problem.
898     if(frame_status(f->region(), f->addr()) < ParseFrame::PARSED) {
899         // XXX prevent caching of blocks, extents for functions that
900         // are actively being parsed. This prevents callbacks and other
901         // functions called from within, e.g. parse_frame from setting
902         // the caching flag and preventing later updates to the blocks()
903         // vector during finalization.
904         cache_value = false;
905     }*/
906
907     parsing_printf("[%s] finalizing %s (%lx)\n",
908                    FILE__,f->name().c_str(),f->addr());
909
910     region_data * rd = _parse_data->findRegion(f->region());
911     assert(rd);
912
913     // finish delayed parsing and sorting
914     Function::blocklist blocks = f->blocks_int();
915
916     // is this the first time we've parsed this function?
917     if (unlikely( !f->_extents.empty() )) {
918         _parse_data->remove_extents(f->_extents);
919         f->_extents.clear();
920     }
921
922
923     if(blocks.empty()) {
924         f->_cache_valid = cache_value; // see above
925         return;
926     }
927
928     auto bit = blocks.begin();
929     FuncExtent * ext = NULL;
930     Address ext_s = (*bit)->start();
931     Address ext_e = ext_s;
932
933     for( ; bit != blocks.end(); ++bit) {
934         Block * b = *bit;
935         finalize_block(b, f);
936         if(b->start() > ext_e) {
937             ext = new FuncExtent(f,ext_s,ext_e);
938
939             // remove access history for ext before publishing it
940             // to concurrent readers to avoid false race reports.
941             // ext is written before it is published and only read
942             // thereafter.
943             race_detector_forget_access_history(ext, sizeof(*ext));
944
945             parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
946             f->_extents.push_back(ext);
947             rd->funcsByRange.insert(ext);
948             ext_s = b->start();
949         }
950         ext_e = b->end();
951     }
952     ext = new FuncExtent(f,ext_s,ext_e);
953
954     // remove access history for ext before publishing it
955     // to concurrent readers to avoid false race reports.
956     // ext is written before it is published and only read
957     // thereafter.
958     race_detector_forget_access_history(ext, sizeof(*ext));
959
960     parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
961     rd->funcsByRange.insert(ext);
962     f->_extents.push_back(ext);
963
964     f->_cache_valid = cache_value; // see comment at function entry
965
966     if (unlikely( f->obj()->defensiveMode())) {
967         // add fallthrough edges for calls assumed not to be returning
968         // whose fallthrough blocks we parsed anyway (this happens if 
969         // the FT block is also a branch target)
970         Function::edgelist & edges = f->_call_edge_list;
971         for (Function::edgelist::iterator eit = edges.begin();
972              eit != edges.end();
973              eit++)
974         {
975             boost::lock_guard<Block> g(*(*eit)->src());
976             if (2 > (*eit)->src()->targets().size()) {
977                 Block *ft = _parse_data->findBlock((*eit)->src()->region(),
978                                                    (*eit)->src()->end());
979                 if (ft && HASHDEF(f->_bmap,ft->start())) {
980                     link((*eit)->src(),ft,CALL_FT,false);
981                 }
982             }
983         }
984     }
985 }
986
987 void
988 Parser::finalize()
989 {
990     if(_parse_state < FINALIZED) {
991         finalize_funcs(hint_funcs);
992         finalize_funcs(discover_funcs);
993         _parse_state = FINALIZED;
994     }
995 }
996
997 void
998 Parser::finalize_funcs(vector<Function *> &funcs)
999 {
1000     vector<Function *>::iterator fit = funcs.begin();
1001     for( ; fit != funcs.end(); ++fit) {
1002         finalize(*fit);
1003     }
1004 }
1005
1006 void
1007 Parser::record_func(Function *f)
1008 {
1009     if(!f) return;
1010
1011     if(f->src() == HINT)
1012         hint_funcs.push_back(f);
1013     else
1014         discover_funcs.push_back(f);
1015
1016     sorted_funcs.insert(f);
1017
1018     _parse_data->record_func(f);
1019 }
1020
1021 void
1022 Parser::init_frame(ParseFrame & frame)
1023 {
1024     boost::lock_guard<ParseFrame> g(frame);
1025     Block * b = NULL;
1026     Block * split = NULL;
1027
1028     if ( ! frame.func->_entry )
1029     {
1030         // Find or create a block
1031         b = block_at(frame.func, frame.func->addr(),split);
1032         if(b) {
1033             frame.leadersToBlock[frame.func->addr()] = b;
1034             frame.func->_entry = b;
1035             frame.seed = new ParseWorkElem(NULL,NULL,frame.func->addr(),true,false);
1036             frame.pushWork(frame.seed);
1037         } else {
1038             parsing_printf("[%s] failed to initialize parsing frame\n",
1039                            FILE__);
1040             return;
1041         }
1042         if (split) {
1043             _pcb.splitBlock(split,b);
1044         }
1045     }
1046
1047     // FIXME these operations should move into the actual parsing
1048     Address ia_start = frame.func->addr();
1049     unsigned size =
1050             frame.codereg->offset() + frame.codereg->length() - ia_start;
1051     const unsigned char* bufferBegin =
1052             (const unsigned char *)(frame.func->isrc()->getPtrToInstruction(ia_start));
1053     InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
1054     InstructionAdapter_t* ah = InstructionAdapter_t::makePlatformIA_IAPI(obj().cs()->getArch(),
1055                                                                          dec, ia_start, frame.func->obj(),
1056                                                                          frame.codereg, frame.func->isrc(), b);
1057     if(ah->isStackFramePreamble()) {
1058         frame.func->_no_stack_frame = false;
1059     }
1060     frame.func->_saves_fp = ah->savesFP();
1061     delete ah;
1062 }
1063
1064 void ParseFrame::cleanup()
1065 {
1066     boost::lock_guard<ParseFrame> g(*this);
1067     for(unsigned i=0;i<work_bundles.size();++i)
1068         delete work_bundles[i];
1069     work_bundles.clear();
1070     if(seed)
1071         delete seed;
1072     seed = NULL;
1073 }
1074
1075 namespace {
1076     inline Edge * bundle_call_edge(ParseWorkBundle * b)
1077     {
1078         if(!b) return NULL;
1079
1080         vector<ParseWorkElem*> const& elems = b->elems();
1081         vector<ParseWorkElem*>::const_iterator it = elems.begin();
1082         for( ; it != elems.end(); ++it) {
1083             if((*it)->edge()->type() == CALL)
1084                 return (*it)->edge();
1085         }
1086         return NULL;
1087     }
1088
1089     /* 
1090      * Look up the next block for detection of straight-line
1091      * fallthrough edges into existing blocks.
1092      */
1093 }
1094
1095 void
1096 Parser::parse_frame(ParseFrame & frame, bool recursive) {
1097     bool already_processing = frame.inProcess.exchange(true);
1098     if(already_processing) return;
1099     boost::lock_guard<Function> g(*frame.func);
1100     /** Persistent intermediate state **/
1101     InstructionAdapter_t *ahPtr = NULL;
1102     ParseFrame::worklist_t & worklist = frame.worklist;
1103     dyn_hash_map<Address, Block *> & leadersToBlock = frame.leadersToBlock;
1104     Address & curAddr = frame.curAddr;
1105     Function * func = frame.func;
1106     dyn_hash_map<Address, bool> & visited = frame.visited;
1107     unsigned & num_insns = frame.num_insns;
1108     func->_cache_valid = false;
1109
1110     /** Non-persistent intermediate state **/
1111     Address nextBlockAddr;
1112     Block * nextBlock;
1113
1114     if (frame.status() == ParseFrame::UNPARSED) {
1115         parsing_printf("[%s] ==== starting to parse frame %lx ====\n",
1116                        FILE__,frame.func->addr());
1117         // prevents recursion of parsing
1118         frame.func->_parsed = true;
1119     } else {
1120         parsing_printf("[%s] ==== resuming parse of frame %lx ====\n",
1121                        FILE__,frame.func->addr());
1122         // Pull work that can be resumed off the delayedWork list
1123         std::map<ParseWorkElem *, Function *>::iterator iter;
1124
1125         vector<ParseWorkElem *> updated;
1126         vector<ParseWorkElem *>::iterator uIter;
1127
1128         for (iter = frame.delayedWork.begin();
1129              iter != frame.delayedWork.end();
1130              ++iter) {
1131             if (iter->second->retstatus() != UNSET) {
1132                 frame.pushWork(iter->first);
1133                 updated.push_back(iter->first);
1134             }
1135         }
1136         for (uIter = updated.begin(); uIter != updated.end(); ++uIter) {
1137             frame.delayedWork.erase(*uIter);
1138         }
1139     }
1140
1141
1142     frame.set_status(ParseFrame::PROGRESS);
1143
1144     while(!worklist.empty()) {
1145
1146         Block * cur = NULL;
1147         ParseWorkElem * work = frame.popWork();
1148         if (work->order() == ParseWorkElem::call) {
1149             Function * ct = NULL;
1150
1151             if (!work->callproc()) {
1152                 // If we're not doing recursive traversal, skip *all* of the call edge processing.
1153                 // We don't want to create a stub. We'll handle the assumption of a fallthrough
1154                 // target for the fallthrough work element, not the call work element. Catch blocks
1155                 // will get excluded but that's okay; our heuristic is not reliable enough that I'm willing to deploy
1156                 // it when we'd only be detecting a non-returning callee by name anyway.
1157                 // --BW 12/2012
1158                 if (!recursive) {
1159                     parsing_printf("[%s] non-recursive parse skipping call %lx->%lx\n",
1160                                    FILE__, work->edge()->src()->lastInsnAddr(), work->target());
1161                     continue;
1162                 }
1163
1164                 parsing_printf("[%s] binding call %lx->%lx\n",
1165                                FILE__,work->edge()->src()->lastInsnAddr(),work->target());
1166
1167                 pair<Function*,Edge*> ctp =
1168                         bind_call(frame,
1169                                   work->target(),
1170                                   work->edge()->src(),
1171                                   work->edge());
1172                 ct = ctp.first;
1173
1174                 work->mark_call();
1175             } else {
1176                 ct = _parse_data->findFunc(frame.codereg,work->target());
1177             }
1178
1179             if (recursive && ct &&
1180                 (frame_status(ct->region(),ct->addr())==ParseFrame::UNPARSED ||
1181                  frame_status(ct->region(),ct->addr())==ParseFrame::BAD_LOOKUP)) {
1182                 // suspend this frame and parse the next
1183                 parsing_printf("    [suspend frame %lx]\n", func->addr());
1184                 frame.call_target = ct;
1185                 frame.set_status(ParseFrame::CALL_BLOCKED);
1186                 // need to re-visit this edge
1187                 frame.pushWork(work);
1188                 if (ahPtr) delete ahPtr;
1189                 return;
1190             } else if (ct && work->tailcall()) {
1191                 // XXX The target has been or is currently being parsed (else
1192                 //     the previous conditional would have been taken),
1193                 //     so if its return status is unset then this
1194                 //     function has to take UNKNOWN
1195                 if (func->retstatus() != RETURN) {
1196                     if (ct->retstatus() > NORETURN)
1197                         func->set_retstatus(ct->retstatus());
1198                     else if (ct->retstatus() == UNSET)
1199                         func->set_retstatus(UNKNOWN);
1200                 }
1201             }
1202
1203             continue;
1204         } else if (work->order() == ParseWorkElem::call_fallthrough) {
1205             // check associated call edge's return status
1206             Edge * ce = bundle_call_edge(work->bundle());
1207             if (!ce) {
1208                 // odd; no call edge in this bundle
1209                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
1210                                FILE__,work->edge()->src()->lastInsnAddr());
1211             } else {
1212                 // check for system call FT
1213                 Edge* edge = work->edge();
1214                 Block::Insns blockInsns;
1215 //                boost::lock_guard<Block> src_guard(*edge->src());
1216                 edge->src()->getInsns(blockInsns);
1217                 auto prev = blockInsns.rbegin();
1218                 InstructionAPI::Instruction prevInsn = prev->second;
1219                 bool is_nonret = false;
1220
1221                 if (prevInsn.getOperation().getID() == e_syscall) {
1222                     Address src = edge->src()->lastInsnAddr();
1223
1224
1225                     // Need to determine if system call is non-returning
1226                     long int syscallNumber;
1227                     if (!getSyscallNumber(func, edge->src(), src, frame.codereg->getArch(), syscallNumber)) {
1228                         // If we cannot retrieve a syscall number, assume the syscall returns
1229                         parsing_printf("[%s] could not retrieve syscall edge, assuming returns\n", FILE__);
1230                     } else {
1231                         if (obj().cs()->nonReturningSyscall(syscallNumber)) {
1232                             is_nonret = true;
1233                         }
1234                     }
1235
1236                     if (is_nonret) {
1237                         parsing_printf("[%s] no fallthrough for non-returning syscall\n",
1238                                        FILE__,
1239                                        work->edge()->src()->lastInsnAddr());
1240
1241                         // unlink tempsink fallthrough edge
1242                         Edge * remove = work->edge();
1243                         remove->src()->removeTarget(remove);
1244                         factory().destroy_edge(remove);
1245                         continue;
1246                     }
1247                 } else {
1248                     Address target = ce->trg()->start();
1249                     Function * ct = _parse_data->findFunc(frame.codereg,target);
1250                     bool is_plt = false;
1251
1252                     // check if associated call edge's return status is still unknown
1253                     if (ct && (ct->retstatus() == UNSET) ) {
1254                         // Delay parsing until we've finished the corresponding call edge
1255                         parsing_printf("[%s] Parsing FT edge %lx, corresponding callee (%s) return status unknown; delaying work\n",
1256                                        __FILE__,
1257                                        work->edge()->src()->lastInsnAddr(),
1258                                        ct->name().c_str());
1259
1260                         // Add work to ParseFrame's delayed list
1261                         frame.pushDelayedWork(work, ct);
1262
1263                         // Continue other work for this frame
1264                         continue;
1265                     }
1266
1267                     is_plt = HASHDEF(plt_entries,target);
1268
1269                     // CodeSource-defined tests 
1270                     is_nonret = obj().cs()->nonReturning(target);
1271                     if (is_nonret) {
1272                         parsing_printf("\t Disallowing FT edge: CodeSource reports nonreturning\n");
1273                     }
1274                     if (!is_nonret && is_plt) {
1275                         is_nonret |= obj().cs()->nonReturning(plt_entries[target]);
1276                         if (is_nonret) {
1277                             parsing_printf("\t Disallowing FT edge: CodeSource reports PLT nonreturning\n");
1278                         }
1279                     }
1280                     // Parsed return status tests
1281                     if (!is_nonret && !is_plt && ct) {
1282                         is_nonret |= (ct->retstatus() == NORETURN);
1283                         if (is_nonret) {
1284                             parsing_printf("\t Disallowing FT edge: function is non-returning\n");
1285                         }
1286                     }
1287                     // Call-stack tampering tests
1288                     if (unlikely(!is_nonret && frame.func->obj()->defensiveMode() && ct)) {
1289                         is_nonret |= (ct->retstatus() == UNKNOWN);
1290                         if (is_nonret) {
1291                             parsing_printf("\t Disallowing FT edge: function in "
1292                                                    "defensive binary may not return\n");
1293                             mal_printf("Disallowing FT edge: function %lx in "
1294                                                "defensive binary may not return\n", ct->addr());
1295                         } else {
1296                             StackTamper ct_tamper = ct->tampersStack();
1297                             is_nonret |= (TAMPER_NONZERO == ct_tamper);
1298                             is_nonret |= (TAMPER_ABS == ct_tamper);
1299                             if (is_nonret) {
1300                                 mal_printf("Disallowing FT edge: function at %lx "
1301                                                    "tampers with its stack\n", ct->addr());
1302                                 parsing_printf("\t Disallowing FT edge: function "
1303                                                        "tampers with its stack\n");
1304                             }
1305                         }
1306                     }
1307                     if (is_nonret) {
1308                         parsing_printf("[%s] no fallthrough for non-returning call "
1309                                                "to %lx at %lx\n",FILE__,target,
1310                                        work->edge()->src()->lastInsnAddr());
1311
1312                         // unlink tempsink fallthrough edge
1313                         Edge * remove = work->edge();
1314                         remove->src()->removeTarget(remove);
1315                         factory().destroy_edge(remove);
1316                         continue;
1317                     } else
1318                         // Invalidate cache_valid for all sharing functions
1319                         invalidateContainingFuncs(func, ce->src());
1320                 }
1321             }
1322         } else if (work->order() == ParseWorkElem::seed_addr) {
1323             cur = leadersToBlock[work->target()];
1324         } else if (work->order() == ParseWorkElem::resolve_jump_table) {
1325             // resume to resolve jump table
1326             auto work_ah = work->ah();
1327             parsing_printf("... continue parse indirect jump at %lx\n", work_ah->getAddr());
1328             Block *nextBlock = work->cur();
1329             if (nextBlock->last() != work_ah->getAddr()) {
1330                 // The block has been split
1331                 region_data * rd = _parse_data->findRegion(frame.codereg);
1332                 set<Block*> blocks;
1333                 rd->blocksByRange.find(work_ah->getAddr(), blocks);
1334                 for (auto bit = blocks.begin(); bit != blocks.end(); ++bit) {
1335                     if ((*bit)->last() == work_ah->getAddr()) {
1336                         nextBlock = (*bit);
1337                         break;
1338                     }
1339                 }
1340
1341             }
1342             ProcessCFInsn(frame,nextBlock,work->ah());
1343             continue;
1344         }
1345             // call fallthrough case where we have already checked that
1346             // the target returns. this is used in defensive mode.
1347         else if (work->order() == ParseWorkElem::checked_call_ft) {
1348             Edge* ce = bundle_call_edge(work->bundle());
1349             if (ce != NULL) {
1350                 invalidateContainingFuncs(func, ce->src());
1351             } else {
1352                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
1353                                FILE__,work->edge()->src()->lastInsnAddr());
1354             }
1355         }
1356
1357         if (NULL == cur) {
1358             pair<Block*,Edge*> newedge =
1359                     add_edge(frame,
1360                              frame.func,
1361                              work->edge()->src(),
1362                              work->target(),
1363                              work->edge()->type(),
1364                              work->edge());
1365             cur = newedge.first;
1366         }
1367
1368         if (HASHDEF(visited,cur->start()))
1369         {
1370             parsing_printf("[%s] skipping locally parsed target at %lx\n",
1371                            FILE__,work->target());
1372             continue;
1373         }
1374         visited[cur->start()] = true;
1375         leadersToBlock[cur->start()] = cur;
1376
1377         if (!cur->_parsed)
1378         {
1379             parsing_printf("[%s] parsing block %lx\n",
1380                            FILE__,cur->start());
1381             if (frame.func->obj()->defensiveMode()) {
1382                 mal_printf("new block at %lx (0x%lx)\n",cur->start(), cur);
1383             }
1384
1385             cur->_parsed = true;
1386             curAddr = cur->start();
1387         } else {
1388             parsing_printf("[%s] deferring parse of shared block %lx\n",
1389                            FILE__,cur->start());
1390             if (func->retstatus() < UNKNOWN) {
1391                 // we've parsed into another function, if we've parsed
1392                 // into it's entry point, set retstatus to match it
1393                 Function * other_func = _parse_data->findFunc(
1394                         func->region(), cur->start());
1395                 if (other_func && other_func->retstatus() > UNKNOWN) {
1396                     func->set_retstatus(other_func->retstatus());
1397                 } else {
1398                     func->set_retstatus(UNKNOWN);
1399                 }
1400             }
1401             // The edge to this shared block is changed from
1402             // "going to sink" to going to this shared block.
1403             // This changes the function boundary, so we need to
1404             // invalidate the cache.
1405             func->_cache_valid = false;
1406             continue;
1407         }
1408
1409         /*
1410          * External consumers of parsing may have invoked finalizing
1411          * methods from callback routines. Ensure that the caches
1412          * are invalidated because a new block is being added to the
1413          * function's view
1414          */
1415         func->_cache_valid = false;
1416
1417         // NB Using block's region() here because it may differ from the
1418         //    function's if control flow has jumped to another region
1419         unsigned size =
1420                 cur->region()->offset() + cur->region()->length() - curAddr;
1421         const unsigned char* bufferBegin =
1422                 (const unsigned char *)(func->isrc()->getPtrToInstruction(curAddr));
1423         InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
1424
1425         if (!ahPtr)
1426             ahPtr = InstructionAdapter_t::makePlatformIA_IAPI(func->obj()->cs()->getArch(), dec, curAddr, func->obj(),
1427                                                               cur->region(), func->isrc(), cur);
1428         else
1429             ahPtr->reset(dec,curAddr,func->obj(),
1430                          cur->region(), func->isrc(), cur);
1431
1432         InstructionAdapter_t * ah = ahPtr;
1433
1434         using boost::tuples::tie;
1435         tie(nextBlockAddr,nextBlock) = func->get_next_block(
1436                 frame.curAddr, frame.codereg);
1437
1438         bool isNopBlock = ah->isNop();
1439
1440         while(true) {
1441             curAddr = ah->getAddr();
1442             /** Check for straight-line fallthrough **/
1443             if (curAddr == nextBlockAddr) {
1444                 parsing_printf("[%s] straight-line parse into block at %lx\n",
1445                                FILE__,curAddr);
1446                 if (frame.func->obj()->defensiveMode()) {
1447                     mal_printf("straight-line parse into block at %lx\n",curAddr);
1448                 }
1449                 ah->retreat();
1450                 curAddr = ah->getAddr();
1451
1452                 end_block(cur,ahPtr);
1453                 pair<Block*,Edge*> newedge =
1454                         add_edge(frame,frame.func,cur,
1455                                  nextBlockAddr,FALLTHROUGH,NULL);
1456
1457                 if (!HASHDEF(visited,nextBlockAddr) &&
1458                     !HASHDEF(leadersToBlock,nextBlockAddr)) {
1459                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1460                                    FILE__,__LINE__,nextBlockAddr);
1461
1462                     frame.pushWork(
1463                             frame.mkWork(
1464                                     NULL,
1465                                     newedge.second,
1466                                     nextBlockAddr,
1467                                     true,
1468                                     false)
1469                     );
1470                     /* preserved as example of leaky code; use mkWork instead
1471                     frame.pushWork(
1472                         new ParseWorkElem(
1473                             NULL,
1474                             newedge.second,
1475                             nextBlockAddr,
1476                             true,
1477                             false)
1478                         );
1479                     */
1480                     leadersToBlock[nextBlockAddr] = nextBlock;
1481                 }
1482                 break;
1483             } else if (curAddr > nextBlockAddr) {
1484                 parsing_printf("[%s:%d] inconsistent instruction stream: "
1485                                        "%lx is within [%lx,%lx)\n",
1486                                FILE__,__LINE__,curAddr,
1487                                nextBlock->start(),nextBlock->end());
1488
1489                 // NB "cur" hasn't ended, so its range may
1490                 // not look like it overlaps with nextBlock
1491                 _pcb.overlapping_blocks(cur,nextBlock);
1492
1493                 tie(nextBlockAddr,nextBlock) =
1494                         func->get_next_block(frame.curAddr, frame.codereg);
1495             }
1496
1497             // per-instruction callback notification
1498             ParseCallback::insn_details insn_det;
1499             insn_det.insn = ah;
1500
1501             parsing_printf("[%s:%d] curAddr 0x%lx: %s \n",
1502                            FILE__,__LINE__,curAddr, insn_det.insn->getInstruction().format().c_str() );
1503
1504             if (func->_is_leaf_function) {
1505                 Address ret_addr;
1506                 func->_is_leaf_function = !(insn_det.insn->isReturnAddrSave(ret_addr));
1507                 parsing_printf("[%s:%d] leaf %d funcname %s \n",
1508                                FILE__,__LINE__,func->_is_leaf_function, func->name().c_str());
1509                 if (!func->_is_leaf_function) func->_ret_addr = ret_addr;
1510             }
1511
1512             _pcb.instruction_cb(func,cur,curAddr,&insn_det);
1513
1514             if (isNopBlock && !ah->isNop()) {
1515                 ah->retreat();
1516
1517                 end_block(cur,ahPtr);
1518                 pair<Block*,Edge*> newedge =
1519                         add_edge(frame,frame.func,cur,curAddr,FALLTHROUGH,NULL);
1520                 Block * targ = newedge.first;
1521
1522                 parsing_printf("[%s:%d] nop-block ended at %lx\n",
1523                                FILE__,__LINE__,curAddr);
1524                 if (targ && !HASHDEF(visited,targ->start())) {
1525                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1526                                    FILE__,__LINE__,targ->start());
1527
1528                     frame.pushWork(
1529                             frame.mkWork(
1530                                     NULL,
1531                                     newedge.second,
1532                                     targ->start(),
1533                                     true,
1534                                     false)
1535                     );
1536                     leadersToBlock[targ->start()] = targ;
1537                 }
1538                 break;
1539             }
1540
1541             /** Particular instruction handling (calls, branches, etc) **/
1542             ++num_insns;
1543
1544             if(ah->hasCFT()) {
1545                 if (ah->isIndirectJump()) {
1546                     // Create a work element to represent that
1547                     // we will resolve the jump table later
1548                     end_block(cur,ahPtr);
1549                     frame.pushWork( frame.mkWork( work->bundle(), cur, ahPtr));
1550                 } else {
1551                     ProcessCFInsn(frame,cur,ahPtr);
1552                 }
1553                 break;
1554             } else if (func->_saves_fp &&
1555                        func->_no_stack_frame &&
1556                        ah->isFrameSetupInsn()) { // isframeSetup is expensive
1557                 func->_no_stack_frame = false;
1558             } else if (ah->isLeave()) {
1559                 func->_no_stack_frame = false;
1560             } else if ( ah->isAbort() ) {
1561                 // 4. `abort-causing' instructions
1562                 end_block(cur,ahPtr);
1563                 //link(cur, sink_block, DIRECT, true);
1564                 break;
1565             } else if ( ah->isInvalidInsn() ) {
1566                 // 4. Invalid or `abort-causing' instructions
1567                 end_block(cur,ahPtr);
1568                 link(cur, Block::sink_block, DIRECT, true);
1569                 break;
1570             } else if ( ah->isInterruptOrSyscall() ) {
1571                 // 5. Raising instructions
1572                 end_block(cur,ahPtr);
1573
1574                 pair<Block*,Edge*> newedge =
1575                         add_edge(frame,frame.func,cur,ah->getNextAddr(),FALLTHROUGH,NULL);
1576                 Block * targ = newedge.first;
1577
1578                 if (targ && !HASHDEF(visited,targ->start()) &&
1579                     !HASHDEF(leadersToBlock,targ->start())) {
1580                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1581                                    FILE__,__LINE__,targ->start());
1582
1583                     frame.pushWork(
1584                             frame.mkWork(
1585                                     NULL,
1586                                     newedge.second,
1587                                     targ->start(),
1588                                     true,
1589                                     false)
1590                     );
1591                     leadersToBlock[targ->start()] = targ;
1592                 }
1593                 if (unlikely(func->obj()->defensiveMode())) {
1594                     fprintf(stderr,"parsed bluepill insn sysenter or syscall "
1595                             "in defensive mode at %lx\n",curAddr);
1596                 }
1597                 break;
1598             } else if (unlikely(func->obj()->defensiveMode())) {
1599                 if (!_pcb.hasWeirdInsns(func) && ah->isGarbageInsn()) {
1600                     // add instrumentation at this addr so we can
1601                     // extend the function if this really executes
1602                     ParseCallback::default_details det(
1603                             (unsigned char*) cur->region()->getPtrToInstruction(cur->lastInsnAddr()),
1604                             cur->end() - cur->lastInsnAddr(),
1605                             true);
1606                     _pcb.abruptEnd_cf(cur->lastInsnAddr(),cur,&det);
1607                     _pcb.foundWeirdInsns(func);
1608                     end_block(cur,ahPtr);
1609                     // allow invalid instructions to end up as a sink node.
1610                     link(cur, Block::sink_block, DIRECT, true);
1611                     break;
1612                 } else if (ah->isNopJump()) {
1613                     // patch the jump to make it a nop, and re-set the 
1614                     // instruction adapter so we parse the instruction
1615                     // as a no-op this time, allowing the subsequent
1616                     // instruction to be parsed correctly
1617                     mal_printf("Nop jump at %lx, changing it to nop\n",ah->getAddr());
1618                     _pcb.patch_nop_jump(ah->getAddr());
1619                     unsigned bufsize =
1620                             func->region()->offset() + func->region()->length() - ah->getAddr();
1621                     func->region()->offset() + func->region()->length() - ahPtr->getAddr();
1622                     const unsigned char* bufferBegin = (const unsigned char *)
1623                             (func->isrc()->getPtrToInstruction(ah->getAddr()));
1624                     dec = InstructionDecoder
1625                             (bufferBegin, bufsize, frame.codereg->getArch());
1626                     ah->reset(dec, curAddr, func->obj(),
1627                               func->region(), func->isrc(), cur);
1628                 } else {
1629                     entryID id = ah->getInstruction().getOperation().getID();
1630                     switch (id) {
1631                         case e_rdtsc:
1632                             fprintf(stderr,"parsed bluepill insn rdtsc at %lx\n",curAddr);
1633                             break;
1634                         case e_sldt:
1635                             fprintf(stderr,"parsed bluepill insn sldt at %lx\n",curAddr);
1636                             break;
1637                         default:
1638                             break;
1639                     }
1640                 }
1641             } else {
1642                 // default
1643             }
1644
1645             /** Check for overruns of valid address space **/
1646             if (!is_code(func,ah->getNextAddr())) {
1647                 parsing_printf("[%s] next insn %lx is invalid\n",
1648                                FILE__,ah->getNextAddr());
1649
1650                 end_block(cur,ahPtr);
1651                 // We need to tag the block with a sink edge
1652                 link(cur, Block::sink_block, DIRECT, true);
1653                 break;
1654             } else if (!cur->region()->contains(ah->getNextAddr())) {
1655                 parsing_printf("[%s] next address %lx is outside [%lx,%lx)\n",
1656                                FILE__,ah->getNextAddr(),
1657                                cur->region()->offset(),
1658                                cur->region()->offset()+cur->region()->length());
1659                 end_block(cur,ahPtr);
1660                 // We need to tag the block with a sink edge
1661                 link(cur, Block::sink_block, DIRECT, true);
1662                 break;
1663             }
1664             ah->advance();
1665         }
1666     }
1667     if (ahPtr) delete ahPtr;
1668     // Check if parsing is complete
1669     if (!frame.delayedWork.empty()) {
1670         frame.set_status(ParseFrame::FRAME_DELAYED);
1671         return;
1672     }
1673
1674     /** parsing complete **/
1675     if (HASHDEF(plt_entries,frame.func->addr())) {
1676 //        if (obj().cs()->nonReturning(frame.func->addr())) {
1677         if (obj().cs()->nonReturning(plt_entries[frame.func->addr()])) {
1678             frame.func->set_retstatus(NORETURN);
1679         } else {
1680             frame.func->set_retstatus(UNKNOWN);
1681         }
1682
1683         // Convenience -- adopt PLT name
1684         frame.func->_name = plt_entries[frame.func->addr()];
1685     } else if (frame.func->retstatus() == UNSET) {
1686         frame.func->set_retstatus(NORETURN);
1687     }
1688
1689     frame.set_status(ParseFrame::PARSED);
1690
1691     if (unlikely(obj().defensiveMode())) {
1692         // calculate this after setting the function to PARSED, so that when
1693         // we finalize the function we'll actually save the results and won't
1694         // re-finalize it
1695         func->tampersStack();
1696     }
1697     _pcb.newfunction_retstatus( func );
1698 }
1699
1700 void
1701 Parser::end_block(Block * b, InstructionAdapter_t * ah)
1702 {
1703     b->_lastInsn = ah->getAddr();
1704     b->updateEnd(ah->getNextAddr());
1705
1706 //    record_block(b);
1707 }
1708
1709 void
1710 Parser::record_block(Block *b)
1711 {
1712     parsing_printf("[%s:%d] recording block [%lx,%lx)\n",
1713                    FILE__,__LINE__,b->start(),b->end());
1714     _parse_data->record_block(b->region(),b);
1715 }
1716
1717
1718 Block *
1719 Parser::block_at(
1720         Function * owner,
1721         Address addr,
1722         Block * & split)
1723 {
1724 //    ScopeLock<Mutex<true> > l(work_mutex);
1725
1726     Block * exist = NULL;
1727     Block * ret = NULL;
1728     Block * inconsistent = NULL;
1729     Address prev_insn = 0;
1730
1731     split = NULL;
1732     CodeRegion *cr;
1733     if(owner->region()->contains(addr))
1734         cr = owner->region();
1735     else
1736         cr = _parse_data->reglookup(owner->region(),addr);
1737
1738
1739     if(!is_code(owner,addr)) {
1740         parsing_printf("[%s] block address %lx rejected by isCode()\n",
1741                        FILE__,addr);
1742         return NULL;
1743     }
1744
1745
1746     {
1747 #ifdef ENABLE_RACE_DETECTION
1748     // this lock causes deadlock when running in parallel, but it is
1749     // useful for suppressing unimportant races on the iterator
1750     boost::lock_guard<Function> g(*owner);
1751 #endif
1752     for(auto i = owner->blocks_begin();
1753         i != owner->blocks_end();
1754         ++i)
1755     {
1756         if((*i)->start() == addr) return *i;
1757         if((*i)->start() < addr && addr < (*i)->end()) {
1758             return split_block(owner, *i, addr, prev_insn);
1759         }
1760     }
1761     return _cfgfact._mkblock(owner, cr, addr);
1762     }
1763 }
1764
1765 pair<Block *,Edge *>
1766 Parser::add_edge(
1767         ParseFrame & frame,
1768         Function * owner,
1769         Block * src,
1770         Address dst,
1771         EdgeTypeEnum et,
1772         Edge * exist)
1773 {
1774     // adjust anything pointing to zero to force it to sink
1775     if(dst == 0) { dst = -1; }
1776     Block * split = NULL;
1777     Block * ret = NULL;
1778     Edge * newedge = NULL;
1779     pair<Block *, Edge *> retpair((Block *) NULL, (Edge *) NULL);
1780
1781     if(!is_code(owner,dst)) {
1782         parsing_printf("[%s] target address %lx rejected by isCode()\n",dst);
1783         return retpair;
1784     }
1785
1786     ret = block_at(owner,dst,split);
1787     retpair.first = ret;
1788
1789     if(split == src) {
1790         // special case -- same block
1791         src = ret;
1792     }
1793
1794     if(split && HASHDEF(frame.visited,split->start())) {
1795         // prevent "delayed parsing" of extant block if 
1796         // this frame has already visited it
1797         frame.visited[ret->start()] = true;
1798         frame.leadersToBlock[ret->start()] = ret;
1799     }
1800
1801     if(NULL == exist) {
1802         newedge = link(src,ret,et,false);
1803         retpair.second = newedge;
1804     } else {
1805         relink(exist,src,ret);
1806         retpair.second = exist;
1807     }
1808
1809     return retpair;
1810 }
1811
1812 Block *
1813 Parser::split_block(
1814         Function * owner,
1815         Block *b,
1816         Address addr,
1817         Address previnsn)
1818 {
1819     Block * ret;
1820     CodeRegion * cr;
1821     bool isRetBlock = false;
1822     if(owner->region()->contains(b->start()))
1823         cr = owner->region();
1824     else
1825         cr = _parse_data->reglookup(owner->region(),b->start());
1826     region_data * rd = _parse_data->findRegion(cr);
1827
1828     // enable for extra-safe testing, but callers are responsbible
1829     // assert(b->consistent(addr);
1830
1831     ret = factory()._mkblock(owner,cr,addr);
1832
1833     // move out edges
1834
1835     {
1836
1837         Block::edgelist &trgs = b->_trglist;
1838         Block::edgelist::iterator tit = trgs.begin();
1839         for (; tit != trgs.end(); ++tit) {
1840             Edge *e = *tit;
1841             e->_source = ret;
1842             ret->_trglist.push_back(e);
1843         }
1844         if (!trgs.empty() && RET == (*trgs.begin())->type()) {
1845             isRetBlock = true;
1846         }
1847         trgs.clear();
1848         ret->updateEnd(b->end());
1849         ret->_lastInsn = b->_lastInsn;
1850         ret->_parsed = true;
1851         link(b,ret,FALLTHROUGH,false);
1852
1853         record_block(ret);
1854
1855         // b's range has changed
1856         //rd->updateBlockEnd(b, addr, previnsn);
1857         // Any functions holding b that have already been finalized
1858         // need to have their caches invalidated so that they will
1859         // find out that they have this new 'ret' block
1860         std::set<Function*> prev_owners;
1861         rd->findFuncs(b->start(),prev_owners);
1862         for(std::set<Function*>::iterator oit = prev_owners.begin();
1863             oit != prev_owners.end(); ++oit)
1864         {
1865             Function * po = *oit;
1866             if (po->_cache_valid) {
1867                 po->_cache_valid = false;
1868                 parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
1869                                        "func at %lx\n",
1870                                FILE__,__LINE__,b->start(),b->end(),po->addr());
1871             }
1872             if (isRetBlock) {
1873                 po->_retBL.clear(); //could remove b from the vector instead of clearing it, not sure what's cheaper
1874             }
1875         }
1876         // KEVINTODO: study performance impact of this callback
1877         _pcb.splitBlock(b,ret);
1878     }
1879
1880     return ret;
1881 }
1882
1883
1884 pair<Function *,Edge*>
1885 Parser::bind_call(ParseFrame & frame, Address target, Block * cur, Edge * exist)
1886 {
1887     Function * tfunc = NULL;
1888     Block * tblock = NULL;
1889     FuncSource how = RT;
1890     if(frame.func->_src == GAP || frame.func->_src == GAPRT)
1891         how = GAPRT;
1892
1893     // look it up
1894     tfunc = _parse_data->get_func(frame.codereg,target,how);
1895     if(!tfunc) {
1896         parsing_printf("[%s:%d] can't bind call to %lx\n",
1897                        FILE__,__LINE__,target);
1898         return pair<Function*,Edge*>((Function *) NULL,exist);
1899     }
1900     _pcb.discover_function(tfunc);
1901
1902     // add an edge
1903     pair<Block*,Edge*> ret = add_edge(frame,tfunc,cur,target,CALL,exist);
1904     tblock = ret.first;
1905     if(!tblock) {
1906         parsing_printf("[%s:%d] can't bind call to %lx\n",
1907                        FILE__,__LINE__,target);
1908         return pair<Function*,Edge*>((Function *) NULL,exist);
1909     }
1910
1911     return pair<Function*,Edge*>(tfunc,ret.second);
1912 }
1913
1914 Function *
1915 Parser::findFuncByEntry(CodeRegion *r, Address entry)
1916 {
1917     if(_parse_state < PARTIAL) {
1918         parsing_printf("[%s:%d] Parser::findFuncByEntry([%lx,%lx),%lx) "
1919                                "forced parsing\n",
1920                        FILE__,__LINE__,r->low(),r->high(),entry);
1921         parse();
1922     }
1923     return _parse_data->findFunc(r,entry);
1924 }
1925
1926 int
1927 Parser::findFuncs(CodeRegion *r, Address addr, set<Function *> & funcs)
1928 {
1929     if(_parse_state < COMPLETE) {
1930         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1931                                "forced parsing\n",
1932                        FILE__,__LINE__,r->low(),r->high(),addr);
1933         parse();
1934     }
1935     if(_parse_state < FINALIZED) {
1936         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1937                                "forced finalization\n",
1938                        FILE__,__LINE__,r->low(),r->high(),addr);
1939         finalize();
1940     }
1941     return _parse_data->findFuncs(r,addr,funcs);
1942 }
1943
1944 int
1945 Parser::findFuncs(CodeRegion *r, Address start, Address end, set<Function *> & funcs)
1946 {
1947     if(_parse_state < COMPLETE) {
1948         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
1949                                "forced parsing\n",
1950                        FILE__,__LINE__,r->low(),r->high(),start,end);
1951         parse();
1952     }
1953     if(_parse_state < FINALIZED) {
1954         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
1955                                "forced finalization\n",
1956                        FILE__,__LINE__,r->low(),r->high(),start,end);
1957         finalize();
1958     }
1959     return _parse_data->findFuncs(r,start,end,funcs);
1960 }
1961
1962 Block *
1963 Parser::findBlockByEntry(CodeRegion *r, Address entry)
1964 {
1965     if(_parse_state < PARTIAL) {
1966         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
1967                                "forced parsing\n",
1968                        FILE__,__LINE__,r->low(),r->high(),entry);
1969         parse();
1970     }
1971     return _parse_data->findBlock(r,entry);
1972 }
1973
1974 Block *
1975 Parser::findNextBlock(CodeRegion *r, Address addr)
1976 {
1977     if(_parse_state < PARTIAL) {
1978         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
1979                                "forced parsing\n",
1980                        FILE__,__LINE__,r->low(),r->high(),addr);
1981         parse();
1982     }
1983     return _parse_data->findRegion(r)->get_next_block(addr).second;
1984 }
1985
1986 int
1987 Parser::findBlocks(CodeRegion *r, Address addr, set<Block *> & blocks)
1988 {
1989     if(_parse_state < COMPLETE) {
1990         parsing_printf("[%s:%d] Parser::findBlocks([%lx,%lx),%lx,...) "
1991                                "forced parsing\n",
1992                        FILE__,__LINE__,r->low(),r->high(),addr);
1993         parse();
1994     }
1995     return _parse_data->findBlocks(r,addr,blocks);
1996 }
1997
1998 // find blocks without parsing.
1999 int Parser::findCurrentBlocks(CodeRegion* cr, Address addr,
2000                               std::set<Block*>& blocks) {
2001     return _parse_data->findBlocks(cr, addr, blocks);
2002 }
2003
2004 int Parser::findCurrentFuncs(CodeRegion * cr, Address addr, std::set<Function*> &funcs) {
2005     return _parse_data->findFuncs(cr, addr, funcs);
2006 }
2007
2008 Edge*
2009 Parser::link(Block *src, Block *dst, EdgeTypeEnum et, bool sink)
2010 {
2011     assert(et != NOEDGE);
2012     Edge * e = factory()._mkedge(src,dst,et);
2013     e->_type._sink = sink;
2014     src->addTarget(e);
2015     dst->addSource(e);
2016     _pcb.addEdge(src, e, ParseCallback::target);
2017     _pcb.addEdge(dst, e, ParseCallback::source);
2018     return e;
2019 }
2020
2021 /* 
2022  * During parsing, all edges are temporarily linked to the _tempsink
2023  * block. Adding and then removing edges to and from this block is
2024  * wasteful, especially given that removal is an O(N) operation with
2025  * vector storage. This call thus does not record edges into the sink.
2026  * These edges are guaranteed to have been relinked when parsing is
2027  * in state COMPLETE.
2028  *
2029  * NB This introduces an inconsistency into the CFG invariant that all
2030  *    targets of an edge have that edge in their source list if the
2031  *    data structures are queried during parsing. Extenders of 
2032  *    parsing callbacks are the only ones who can see this state;
2033  *    they have been warned and should know better.
2034  */
2035 Edge*
2036 Parser::link_tempsink(Block *src, EdgeTypeEnum et)
2037 {
2038     Edge * e = factory()._mkedge(src,Block::sink_block,et);
2039     e->_type._sink = true;
2040     src->_trglist.push_back(e);
2041     return e;
2042 }
2043
2044 void
2045 Parser::relink(Edge * e, Block *src, Block *dst)
2046 {
2047     assert(e);
2048     bool addSrcAndDest = true;
2049     if(src != e->src()) {
2050         assert(e);
2051         e->src()->removeTarget(e);
2052         _pcb.removeEdge(e->src(), e, ParseCallback::target);
2053         assert(e);
2054         e->_source = src;
2055         src->addTarget(e);
2056         _pcb.addEdge(src, e, ParseCallback::target);
2057         addSrcAndDest = false;
2058     }
2059     if(dst != e->trg()) {
2060         assert(e);
2061         if(e->trg() != Block::sink_block) {
2062             assert(e);
2063             if(e->trg()) {
2064                 e->trg()->removeSource(e);
2065             }
2066             _pcb.removeEdge(e->trg(), e, ParseCallback::source);
2067             addSrcAndDest = false;
2068         }
2069         dst->addSource(e);
2070         _pcb.addEdge(dst, e, ParseCallback::source);
2071         if (addSrcAndDest) {
2072             // We're re-linking a sinkEdge to be a non-sink edge; since 
2073             // we don't inform PatchAPI of temporary sinkEdges, we have
2074             // to add both the source AND target edges
2075             _pcb.addEdge(src, e, ParseCallback::target);
2076         }
2077     }
2078
2079     e->_type._sink = (dst == Block::sink_block);
2080 }
2081
2082 ParseFrame::Status
2083 Parser::frame_status(CodeRegion * cr, Address addr)
2084 {
2085     // XXX parsing frames may have been cleaned up, but status
2086     //     is always cached
2087     return _parse_data->frameStatus(cr,addr);
2088 }
2089
2090 void
2091 Parser::remove_func(Function *func)
2092 {
2093     if (sorted_funcs.end() != sorted_funcs.find(func)) {
2094         sorted_funcs.erase(func);
2095     }
2096     if (HINT == func->src()) {
2097         for (unsigned fidx=0; fidx < hint_funcs.size(); fidx++) {
2098             if (hint_funcs[fidx] == func) {
2099                 hint_funcs[fidx] = hint_funcs[hint_funcs.size()-1];
2100                 hint_funcs.pop_back();
2101                 break;
2102             }
2103         }
2104     }
2105     else {
2106         for (unsigned fidx=0; fidx < discover_funcs.size(); fidx++) {
2107             if (discover_funcs[fidx] == func) {
2108                 discover_funcs[fidx] = discover_funcs[discover_funcs.size()-1];
2109                 discover_funcs.pop_back();
2110                 break;
2111             }
2112         }
2113     }
2114
2115     _parse_data->remove_func(func);
2116 }
2117
2118 void
2119 Parser::remove_block(Dyninst::ParseAPI::Block *block)
2120 {
2121     boost::lock_guard<ParseData> g(*_parse_data, boost::adopt_lock);
2122     _parse_data->remove_block(block);
2123 }
2124
2125 void Parser::move_func(Function *func, Address new_entry, CodeRegion *new_reg)
2126 {
2127     region_data *reg_data = _parse_data->findRegion(func->region());
2128
2129     race_detector_fake_lock_acquire(race_detector_fake_lock(reg_data->funcsByAddr));
2130     {
2131       tbb::concurrent_hash_map<Address, Function*>::accessor a;
2132       if(reg_data->funcsByAddr.find(a, func->addr()))
2133         {
2134           reg_data->funcsByAddr.erase(a);
2135         }
2136       reg_data = _parse_data->findRegion(new_reg);
2137       reg_data->funcsByAddr.insert(a, make_pair(new_entry, func));
2138     }
2139     race_detector_fake_lock_release(race_detector_fake_lock(reg_data->funcsByAddr));
2140 }
2141
2142 void Parser::invalidateContainingFuncs(Function *owner, Block *b)
2143 {
2144     CodeRegion * cr;
2145     if(owner->region()->contains(b->start()))
2146         cr = owner->region();
2147     else
2148         cr = _parse_data->reglookup(owner->region(),b->start());
2149     region_data * rd = _parse_data->findRegion(cr);
2150
2151
2152     // Any functions holding b that have already been finalized
2153     // need to have their caches invalidated so that they will
2154     // find out that they have this new 'ret' block
2155     std::set<Function*> prev_owners;
2156     rd->findFuncs(b->start(),prev_owners);
2157     for(std::set<Function*>::iterator oit = prev_owners.begin();
2158         oit != prev_owners.end(); ++oit)
2159     {
2160         Function * po = *oit;
2161         po->_cache_valid = false;
2162         parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
2163                                "func at %lx\n",
2164                        FILE__,__LINE__,b->start(),b->end(),po->addr());
2165     }
2166 }
2167
2168 /* Add ParseFrames waiting on func back to the work queue */
2169 void Parser::resumeFrames(Function * func, LockFreeQueue<ParseFrame *> & work)
2170 {
2171     // If we do not know the function's return status, don't put its waiters back on the worklist
2172     if (func->retstatus() == UNSET) {
2173         parsing_printf("[%s] %s return status unknown, cannot resume waiters\n",
2174                        __FILE__,
2175                        func->name().c_str());
2176         return;
2177     }
2178     boost::lock_guard<DelayedFrames> g(delayed_frames);
2179
2180     // When a function's return status is set, all waiting frames back into the worklist
2181     map<Function *, set<ParseFrame *> >::iterator iter = delayed_frames.frames.find(func);
2182     if (iter == delayed_frames.frames.end()) {
2183         // There were no frames waiting, ignore
2184         parsing_printf("[%s] %s return status %d, no waiters\n",
2185                        __FILE__,
2186                        func->name().c_str(),
2187                        func->retstatus());
2188         return;
2189     } else {
2190         parsing_printf("[%s] %s return status %d, undelaying waiting functions\n",
2191                        __FILE__,
2192                        func->name().c_str(),
2193                        func->retstatus());
2194         // Add each waiting frame back to the worklist
2195         set<ParseFrame *> vec = iter->second;
2196         for (set<ParseFrame *>::iterator fIter = vec.begin();
2197              fIter != vec.end();
2198              ++fIter) {
2199             work.insert(*fIter);
2200         }
2201         // remove func from delayedFrames map
2202         delayed_frames.frames.erase(func);
2203     }
2204 }
2205
2206 bool Parser::getSyscallNumber(Function * /*func*/,
2207                               Block * block,
2208                               Address /*addr*/,
2209                               Architecture arch,
2210                               long int & val)
2211 {
2212     val = -1;
2213
2214     // In the common case, the setup of system call number
2215     // is a mov in the previous instruction. We don't currently look elsewhere.
2216     // In the future, could use slicing and symeval to find the value of
2217     // this register at the system call (as unstrip does).
2218     Block::Insns blockInsns;
2219     block->getInsns(blockInsns);
2220     auto prevPair = ++(blockInsns.rbegin());
2221     InstructionAPI::Instruction prevInsn = prevPair->second;
2222     if (prevInsn.getOperation().getID() != e_mov) {
2223         return false;
2224     }
2225
2226     MachRegister syscallNumberReg = MachRegister::getSyscallNumberReg(arch);
2227     if (syscallNumberReg == InvalidReg) {
2228         return false;
2229     }
2230     InstructionAPI::RegisterAST* regAST = new InstructionAPI::RegisterAST(syscallNumberReg);
2231     InstructionAPI::RegisterAST::Ptr regASTPtr = InstructionAPI::RegisterAST::Ptr(regAST);
2232
2233     std::vector<InstructionAPI::Operand> operands;
2234     prevInsn.getOperands(operands);
2235     for (unsigned i = 0; i < operands.size(); i++) {
2236         if (!operands[i].isWritten(regASTPtr)) {
2237             InstructionAPI::Expression::Ptr value = operands[i].getValue();
2238             InstructionAPI::Result res = value->eval();
2239             if (res.defined) {
2240                 val = res.convert<Offset>();
2241             }
2242         }
2243     }
2244
2245     return (val != -1);
2246 }
2247