adjust switches to control cilk vs. openmp support
[dyninst.git] / parseAPI / src / Parser.C
1 /*
2  * See the dyninst/COPYRIGHT file for copyright information.
3  * 
4  * We provide the Paradyn Tools (below described as "Paradyn")
5  * on an AS IS basis, and do not warrant its validity or performance.
6  * We reserve the right to update, modify, or discontinue this
7  * software at any time.  We shall have no obligation to supply such
8  * updates or modifications or any other form of support to you.
9  * 
10  * By your use of Paradyn, you understand and agree that we (or any
11  * other person or entity with proprietary rights in Paradyn) are
12  * under no obligation to provide either maintenance services,
13  * update services, notices of latent defects, or correction of
14  * defects for Paradyn.
15  * 
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  * 
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  * 
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29  */
30
31 #include <omp.h>
32
33 #include <vector>
34 #include <limits>
35
36 // For Mutex
37 #define PROCCONTROL_EXPORTS
38
39 #include "dyntypes.h"
40
41 #include "CodeObject.h"
42 #include "CFGFactory.h"
43 #include "ParseCallback.h"
44 #include "Parser.h"
45 #include "Parser.h"
46 #include "CFG.h"
47 #include "util.h"
48 #include "debug_parse.h"
49
50 #include <boost/tuple/tuple.hpp>
51 #include <boost/bind/bind.hpp>
52
53 #include <boost/thread/thread.hpp>
54
55 #include <boost/timer/timer.hpp>
56 #include <fstream>
57
58 #ifdef ENABLE_RACE_DETECTION
59 #define USE_CILK 1
60 #else
61 #define USE_OPENMP 1
62 #endif
63
64 #if USE_CILK
65 #include <cilk/cilk.h>
66 #include <cilk/cilk_api.h>
67 #endif
68
69 #include "race-detector-annotations.h"
70
71 using namespace std;
72 using namespace Dyninst;
73 using namespace Dyninst::ParseAPI;
74 using namespace Dyninst::InstructionAPI;
75
76 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
77 typedef vector< edge_pair_t > Edges_t;
78
79 #include "common/src/dthread.h"
80
81 namespace {
82     struct less_cr {
83         bool operator()(CodeRegion * x, CodeRegion * y)
84         {
85             return x->offset() < y->offset();
86         }
87     };
88 }
89
90 Parser::Parser(CodeObject & obj, CFGFactory & fact, ParseCallbackManager & pcb) :
91         _obj(obj),
92         _cfgfact(fact),
93         _pcb(pcb),
94         _parse_data(NULL),
95         _parse_state(UNPARSED),
96         _in_parse(false),
97         _in_finalize(false)
98 {
99     delayed_frames.size = 0;
100     // cache plt entries for fast lookup
101     const map<Address, string> & lm = obj.cs()->linkage();
102     map<Address, string>::const_iterator lit = lm.begin();
103     for( ; lit != lm.end(); ++lit) {
104         parsing_printf("Cached PLT entry %s (%lx)\n",lit->second.c_str(),lit->first);
105         plt_entries[lit->first] = lit->second;
106     }
107
108     if(obj.cs()->regions().empty()) {
109         parsing_printf("[%s:%d] CodeSource provides no CodeRegions"
110                                " -- unparesable\n",
111                        FILE__,__LINE__);
112         _parse_state = UNPARSEABLE;
113         return;
114     }
115
116     // check whether regions overlap
117     vector<CodeRegion *> const& regs = obj.cs()->regions();
118     vector<CodeRegion *> copy(regs.begin(),regs.end());
119     sort(copy.begin(),copy.end(),less_cr());
120
121     // allocate a sink block -- region is arbitrary
122
123     bool overlap = false;
124     CodeRegion * prev = copy[0], *cur = NULL;
125     for(unsigned i=1;i<copy.size();++i) {
126         cur = copy[i];
127         if(cur->offset() < prev->offset() + prev->length()) {
128             parsing_printf("Overlapping code regions [%lx,%lx) and [%lx,%lx)\n",
129                            prev->offset(),prev->offset()+prev->length(),
130                            cur->offset(),cur->offset()+cur->length());
131             overlap = true;
132             break;
133         }
134     }
135
136     if(overlap)
137         _parse_data = new OverlappingParseData(this,copy);
138     else
139         _parse_data = new StandardParseData(this);
140 }
141
142 ParseFrame::~ParseFrame()
143 {
144     cleanup(); // error'd frames still need cleanup
145 }
146
147 Parser::~Parser()
148 {
149     if(_parse_data)
150         delete _parse_data;
151
152     for(auto fit = frames.begin() ; fit != frames.end(); ++fit)
153         delete *fit;
154
155     frames.clear();
156 }
157
158 void
159 Parser::add_hint(Function * f)
160 {
161     if(!_parse_data->findFunc(f->region(),f->addr()))
162         record_func(f);
163 }
164
165 template <typename T>
166 std::string pair_to_string(T pr)
167 {
168     std::stringstream s;
169     s << pr.first << ", " << pr.second;
170     return s.str();
171 }
172
173
174 void
175 Parser::parse()
176 {
177     parsing_printf("[%s:%d] parse() called on Parser %p with state %d\n",
178                    FILE__,__LINE__,this, _parse_state);
179
180     // For modification: once we've full-parsed once, don't do it again
181     if (_parse_state >= COMPLETE) return;
182
183     if(_parse_state == UNPARSEABLE)
184         return;
185
186     ScopeLock<Mutex<true> > L(parse_mutex);
187 //    assert(!_in_parse);
188     _in_parse = true;
189
190     parse_vanilla();
191     finalize();
192     // anything else by default...?
193
194     if(_parse_state < COMPLETE)
195         _parse_state = COMPLETE;
196
197     _in_parse = false;
198     parsing_printf("[%s:%d] parsing complete for Parser %p with state %d\n", FILE__, __LINE__, this, _parse_state);
199 #ifdef ADD_PARSE_FRAME_TIMERS
200     std::ofstream stat_log("functions.csv");
201     stat_log << "Results for " << time_histogram.size() << " buckets\n";
202     stat_log << "usecs,count\n";
203     std::transform(time_histogram.begin(), time_histogram.end(),
204                    std::ostream_iterator<std::string >(stat_log, "\n"),
205                    pair_to_string<std::pair<unsigned int, unsigned int> >);
206 #endif
207 }
208
209 void
210 Parser::parse_at(
211         CodeRegion * region,
212         Address target,
213         bool recursive,
214         FuncSource src)
215 {
216     Function *f;
217     ParseFrame *pf;
218     vector<ParseFrame *> work;
219
220     parsing_printf("[%s:%d] entered parse_at([%lx,%lx),%lx)\n",
221                    FILE__,__LINE__,region->low(),region->high(),target);
222
223     if(!region->contains(target)) {
224         parsing_printf("\tbad address, bailing\n");
225         return;
226     }
227
228     if(_parse_state < PARTIAL)
229         _parse_state = PARTIAL;
230
231     f = _parse_data->get_func(region,target,src);
232     if(!f) {
233         parsing_printf("   could not create function at %lx\n",target);
234         return;
235     }
236
237     ParseFrame::Status exist = _parse_data->frameStatus(region,target);
238     if(exist != ParseFrame::BAD_LOOKUP && exist != ParseFrame::UNPARSED) {
239         parsing_printf("   function at %lx already parsed, status %d\n",
240                        target, exist);
241         return;
242     }
243
244     if(!(pf = _parse_data->findFrame(region,target))) {
245         pf = new ParseFrame(f,_parse_data);
246         init_frame(*pf);
247         frames.insert(pf);
248         _parse_data->record_frame(pf);
249     }
250
251     work.push_back(pf);
252     parse_frames(work,recursive);
253
254     // downgrade state if necessary
255     if(_parse_state > COMPLETE)
256         _parse_state = COMPLETE;
257
258 }
259
260 void
261 Parser::parse_at(Address target, bool recursive, FuncSource src)
262 {
263     CodeRegion * region = NULL;
264
265     parsing_printf("[%s:%d] entered parse_at(%lx)\n",FILE__,__LINE__,target);
266
267     if(_parse_state == UNPARSEABLE)
268         return;
269
270     StandardParseData * spd = dynamic_cast<StandardParseData *>(_parse_data);
271     if(!spd) {
272         parsing_printf("   parse_at is invalid on overlapping regions\n");
273         return;
274     }
275
276     region = spd->reglookup(region,target); // input region ignored for SPD
277     if(!region) {
278         parsing_printf("   failed region lookup at %lx\n",target);
279         return;
280     }
281
282     parse_at(region,target,recursive,src);
283 }
284
285 void
286 Parser::parse_vanilla()
287 {
288     ParseFrame *pf;
289     vector<ParseFrame *> work;
290     vector<Function *>::iterator fit;
291
292     parsing_printf("[%s:%d] entered parse_vanilla()\n",FILE__,__LINE__);
293     parsing_printf("\t%d function hints\n",hint_funcs.size());
294
295     if(_parse_state < PARTIAL)
296         _parse_state = PARTIAL;
297     else
298         parsing_printf("\tparse state is %d, some parsing already done\n",
299                        _parse_state);
300
301     /* Initialize parse frames from hints */
302     for(fit=hint_funcs.begin();fit!=hint_funcs.end();++fit) {
303         Function * hf = *fit;
304         ParseFrame::Status test = frame_status(hf->region(),hf->addr());
305         if(test != ParseFrame::BAD_LOOKUP &&
306            test != ParseFrame::UNPARSED)
307         {
308             parsing_printf("\tskipping repeat parse of %lx [%s]\n",
309                            hf->addr(),hf->name().c_str());
310             continue;
311         }
312
313         pf = new ParseFrame(hf,_parse_data);
314         init_frame(*pf);
315         frames.insert(pf);
316         work.push_back(pf);
317         _parse_data->record_frame(pf);
318     }
319
320     parse_frames(work,true);
321 }
322
323 void
324 Parser::parse_edges( vector< ParseWorkElem * > & work_elems )
325 {
326     if(_parse_state == UNPARSEABLE)
327         return;
328
329     // build up set of needed parse frames and load them with work elements
330     set<ParseFrame*> frameset; // for dup checking
331     vector<ParseFrame*> frames;
332
333     for (unsigned idx=0; idx < work_elems.size(); idx++) {
334
335         ParseWorkElem *elem = work_elems[idx];
336         Block *src = elem->edge()->src();
337
338         if (elem->order() == ParseWorkElem::call_fallthrough)
339         {
340             Edge *callEdge = NULL;
341             boost::lock_guard<Block> g(*src);
342             Block::edgelist trgs = src->targets();
343             for (Block::edgelist::iterator eit = trgs.begin();
344                  eit != trgs.end();
345                  eit++)
346             {
347                 if ((*eit)->type() == CALL) {
348                     callEdge = *eit;
349                     if (!(*eit)->sinkEdge()) // if it's a sink edge, look for nonsink CALL
350                         break;
351                 }
352             }
353             // create a call work elem so that the bundle is complete
354             // and set the target function's return status and 
355             // tamper to RETURN and TAMPER_NONE, respectively
356             //assert(callEdge);
357
358             // When we have a direct call to unallocated memory or garbage
359             // we there is no call edge when the call is first executed. I'm
360             // not sure why not, so I'm attempting to continue past here without
361             // fixing up the called function...
362             // Also, in the case I saw, the callee was _not_ returning. Not directly. It 
363             // led into a big case with a longjmp() equivalent. 
364
365             if (callEdge)
366             {
367                 bool isResolvable = false;
368                 Address callTarget = 0;
369                 if ( ! callEdge->sinkEdge() )
370                 {
371                     isResolvable = true;
372                     callTarget = callEdge->trg()->start();
373                     // the call target may be in another Code Object
374                     Function *callee = callEdge->trg()->obj()->findFuncByEntry(
375                             callEdge->trg()->region(), callTarget);
376                     assert(callee);
377                     callee->set_retstatus(RETURN);
378                     callee->_tamper = TAMPER_NONE;
379                 }
380                 elem->bundle()->add(new ParseWorkElem
381                                             ( elem->bundle(),
382                                               callEdge,
383                                               callTarget,
384                                               isResolvable,
385                                               false ));
386             }
387         }
388         ParseFrame *frame = _parse_data->findFrame
389                 ( src->region(),
390                   src->lastInsnAddr() );
391         bool isNewFrame = false;
392         if (!frame) {
393             vector<Function*> funcs;
394             src->getFuncs(funcs);
395             frame = new ParseFrame(*funcs.begin(),_parse_data);
396             for (unsigned fix=1; fix < funcs.size(); fix++) {
397                 // if the block is shared, all of its funcs need
398                 // to add the new edge
399                 funcs[fix]->_cache_valid = false;
400             }
401             isNewFrame = true;
402         }
403
404         // push before frame init so no seed is added
405         if (elem->bundle()) {
406             frame->work_bundles.push_back(elem->bundle());
407         }
408         frame->pushWork(elem);
409         if (isNewFrame) {
410             init_frame(*frame);
411         }
412
413         if (frameset.end() == frameset.find(frame)) {
414             frameset.insert(frame);
415             frames.push_back(frame);
416         }
417     }
418     ScopeLock<Mutex<true> > L(parse_mutex);
419     // now parse
420     if(_parse_state < PARTIAL)
421         _parse_state = PARTIAL;
422     _in_parse = true;
423
424     parse_frames( frames, true );
425
426     if(_parse_state > COMPLETE)
427         _parse_state = COMPLETE;
428     _in_parse = false;
429
430     finalize();
431
432 }
433
434
435 vector<ParseFrame*> Parser::ProcessOneFrame(ParseFrame* pf, bool recursive) {
436   vector<ParseFrame*> more_frames;
437   if (pf->func && !pf->swap_busy(true)) {
438 #ifdef ADD_PARSE_FRAME_TIMERS
439     boost::timer::cpu_timer t;
440     t.start();
441 #endif
442     parse_frame(*pf,recursive);
443 #ifdef ADD_PARSE_FRAME_TIMERS
444     t.stop();
445     unsigned int msecs = floor(t.elapsed().wall / 1000000.0);
446     race_detector_fake_lock_acquire(race_detector_fake_lock(time_histogram));
447     {
448       tbb::concurrent_hash_map<unsigned int, unsigned int>::accessor a;
449       time_histogram.insert(a, msecs);
450       ++(a->second);
451     }
452     race_detector_fake_lock_release(race_detector_fake_lock(time_histogram));
453 #endif
454     more_frames = postProcessFrame(pf, recursive);
455
456     // exclusive access to each ParseFrame is mediated by marking a frame busy.
457     // we clear evidence of our access to the ParseFrame here because a concurrent
458     // thread may try to touch it because of duplicates in the work list. that 
459     // won't actually be concurrent because of swap_busy. we suppress the race 
460     // report by scrubbing information about our access.
461     race_detector_forget_access_history(pf, sizeof(*pf));
462
463     pf->swap_busy(false);
464   }
465   return more_frames;
466 }
467
468 vector<ParseFrame *> Parser::postProcessFrame(ParseFrame *pf, bool recursive) {
469     vector<ParseFrame*> work;
470     boost::lock_guard<ParseFrame> g(*pf);
471     switch(pf->status()) {
472         case ParseFrame::CALL_BLOCKED: {
473             parsing_printf("[%s] frame %lx blocked at %lx\n",
474                            FILE__,pf->func->addr(),pf->curAddr);
475             {
476
477                 assert(pf->call_target);
478
479                 parsing_printf("    call target %lx\n",pf->call_target->addr());
480                 work.push_back(pf);
481
482                 CodeRegion * cr = pf->call_target->region();
483                 Address targ = pf->call_target->addr();
484                 ParseFrame * tf = _parse_data->findFrame(cr,targ);
485                 if(!tf)
486                 {
487                     // sanity
488                     if(_parse_data->frameStatus(cr,targ) == ParseFrame::PARSED)
489                         assert(0);
490
491                     tf = new ParseFrame(pf->call_target,_parse_data);
492                     init_frame(*tf);
493                     frames.insert(tf);
494                     _parse_data->record_frame(tf);
495                 }
496                 if(likely(recursive))
497                     work.push_back(tf);
498                 else {
499                     assert(0);
500                     // XXX should never get here
501                     //parsing_printf("    recursive parsing disabled\n");
502                 }
503
504             }
505             break;
506         }
507         case ParseFrame::PARSED:{
508             parsing_printf("[%s] frame %lx complete, return status: %d\n",
509                            FILE__,pf->func->addr(),pf->func->retstatus());
510
511             if (unlikely(_obj.defensiveMode() &&
512                          TAMPER_NONE != pf->func->tampersStack() &&
513                          TAMPER_NONZERO != pf->func->tampersStack() ))
514             {   // may adjust CALL_FT targets, add a frame to the worklist,
515                 // or trigger parsing in a separate target CodeObject
516                 tamper_post_processing(work,pf);
517             }
518
519             /* add waiting frames back onto the worklist */
520             resumeFrames(pf->func, work);
521
522             pf->cleanup();
523             break;
524         }        
525         case ParseFrame::FRAME_ERROR:
526             parsing_printf("[%s] frame %lx error at %lx\n",
527                            FILE__,pf->func->addr(),pf->curAddr);
528             break;
529         case ParseFrame::FRAME_DELAYED:
530         {
531             boost::lock_guard<DelayedFrames> g(delayed_frames);
532             parsing_printf("[%s] frame %lx delayed at %lx\n",
533                                                                                   FILE__,
534                                                                                   pf->func->addr(),
535                                                                                   pf->curAddr);
536
537             if (pf->delayedWork.size()) {
538                 // Add frame to global delayed list
539
540                 for (auto iter = pf->delayedWork.begin();
541                      iter != pf->delayedWork.end();
542                      ++iter) {
543
544                     Function * ct = iter->second;
545                     parsing_printf("[%s] waiting on %s\n",
546                                    __FILE__,
547                                    ct->name().c_str());
548                     {
549
550                         auto fIter = delayed_frames.frames.find(ct);
551                         if (fIter == delayed_frames.frames.end()) {
552                             set<ParseFrame *> waiters;
553                             waiters.insert(pf);
554                             delayed_frames.frames[ct] = waiters;
555                         } else {
556                             delayed_frames.frames[ct].insert(pf);
557                         }
558                     }
559                 }
560             } else {
561                 // We shouldn't get here
562                 assert(0 && "Delayed frame with no delayed work");
563             }
564
565             /* if the return status of this function has been updated, add
566              * waiting frames back onto the work list */
567             resumeFrames(pf->func, work);
568             break;
569         }
570         case ParseFrame::PROGRESS:
571             // another thread is working on this already; possibly something else unblocked.
572             break;
573
574         default:
575             assert(0 && "invalid parse frame status");
576     }
577     return work;
578
579 }
580
581
582 static void
583 InsertFrames
584 (
585  vector<ParseFrame *> *frames, 
586  LockFreeQueue<ParseFrame *> *q
587 )
588 {
589   if (frames->size() > 0) {
590     LockFreeQueue<ParseFrame *> myq;
591     for (size_t j = 0; j < frames->size(); ++j) {
592       ParseFrame *f = (*frames)[j]; 
593       if (f) myq.insert(f);
594     }
595     q->splice(myq);
596   }
597 }
598
599
600 void
601 Parser::SpawnProcessFrame
602 (
603  ParseFrame *pf, 
604  bool recursive, 
605  LockFreeQueue<ParseFrame *> *work_queue,
606  std::atomic<int> *in_progress
607 )
608 {
609   vector<ParseFrame*> new_frames = ProcessOneFrame(pf, recursive);
610   InsertFrames(&new_frames, work_queue);
611   in_progress->fetch_add(-1);
612 }
613
614
615 void
616 Parser::ProcessFrames
617 (
618  vector<ParseFrame *> *work, 
619  bool recursive 
620 )
621 {
622   LockFreeQueue<ParseFrame *> work_queue;
623   InsertFrames(work, &work_queue);
624   std::atomic<int> in_progress(0);
625 #if USE_OPENMP
626 #pragma omp parallel shared(work_queue, in_progress)
627   {
628     int nthreads = omp_get_num_threads();
629 #pragma omp master
630     for (;;) {
631       if (work_queue.peek() == 0) {
632         if (in_progress.load() == 0) break;
633         if (nthreads < 10) {
634 #pragma omp taskwait
635         }
636       } else {
637         LockFreeQueue<ParseFrame *> private_queue(work_queue.steal());
638         for(;;) {
639           LockFreeQueueItem<ParseFrame *> *first = private_queue.pop();
640           if (first == 0) break;
641           ParseFrame *frame = first->value();
642           delete first;
643           in_progress.fetch_add(1);
644 #pragma omp task shared(work_queue, in_progress)
645           SpawnProcessFrame(frame, recursive, &work_queue, &in_progress);
646         }
647       }
648     }
649   }
650 #elif USE_CILK
651 #if 0
652   {
653     int nthreads = __cilkrts_get_nworkers(); 
654     for (;;) {
655       if (work_queue.peek() == 0) {
656         if (in_progress.load() == 0) break;
657         if (nthreads < 10) {
658           cilk_sync;
659         }
660       } else {
661         LockFreeQueue<ParseFrame *> private_queue(work_queue.steal());
662         for(;;) {
663           LockFreeQueueItem<ParseFrame *> *first = private_queue.pop();
664           if (first == 0) break;
665           ParseFrame *frame = first->value();
666           delete first;
667           in_progress.fetch_add(1);
668           cilk_spawn SpawnProcessFrame(frame, recursive, &work_queue, &in_progress);
669         }
670       }
671     }
672   }
673 #else
674 {
675     int nthreads = __cilkrts_get_nworkers(); 
676     for (;;) {
677       if (work_queue.peek() == 0) break;
678       vector <ParseFrame *> pfv;
679       std::copy(work_queue.begin(), work_queue.end(), std::back_inserter(pfv));
680       work_queue.clear();
681       cilk_for (int i = 0; i < pfv.size(); i++) {
682         ParseFrame *frame = pfv[i];
683         in_progress.fetch_add(1);
684         SpawnProcessFrame(frame, recursive, &work_queue, &in_progress);
685       }
686     }
687   }
688 #endif
689 #else
690   {
691     for (;;) {
692       if (work_queue.peek() == 0) {
693         if (in_progress.load() == 0) break;
694       } else {
695         LockFreeQueue<ParseFrame *> private_queue(work_queue.steal());
696         for(;;) {
697           LockFreeQueueItem<ParseFrame *> *first = private_queue.pop();
698           if (first == 0) break;
699           ParseFrame *frame = first->value();
700           delete first;
701           in_progress.fetch_add(1);
702           SpawnProcessFrame(frame, recursive, &work_queue, &in_progress);
703         }
704       }
705     }
706   }
707 #endif
708 }
709
710
711 void
712 Parser::parse_frames(vector<ParseFrame *> & work, bool recursive)
713 {
714     ProcessFrames(&work, recursive);
715     work.clear();
716
717     bool done = false, cycle = false;
718     {
719         boost::lock_guard<DelayedFrames> g(delayed_frames);
720         if(delayed_frames.frames.empty()) {
721             parsing_printf("[%s] Fixed point reached (0 funcs with unknown return status)\n)",
722                            __FILE__);
723             delayed_frames.size = 0;
724             done = true;
725         } else if(delayed_frames.size == delayed_frames.frames.size()) {
726             cycle = true;
727         }
728     }
729
730     // Use fixed-point to ensure we parse frames whose parsing had to be delayed
731     if (!done) {
732         if(cycle) {
733             processCycle(work,recursive);
734         } else {
735             processFixedPoint(work, recursive);
736         }
737     }
738
739     cleanup_frames();
740 }
741
742 void Parser::processFixedPoint(vector<ParseFrame *> &work, bool recursive) {// We haven't yet reached a fixedpoint; let's recurse
743     {
744         boost::lock_guard<DelayedFrames> g(delayed_frames);
745
746         parsing_printf("[%s] Fixed point not yet reached (%d funcs with unknown return status)\n",
747                        __FILE__,
748                        delayed_frames.frames.size());
749
750         // Update delayed_frames.size for next iteration
751         delayed_frames.size = delayed_frames.frames.size();
752
753         // Check if we can resume any frames yet
754         vector<Function *> updated;
755         for (auto iter = delayed_frames.frames.begin();
756              iter != delayed_frames.frames.end();
757              ++iter) {
758             if (iter->first->retstatus() != UNSET) {
759                 updated.push_back(iter->first);
760             }
761         }
762
763         if (updated.size()) {
764             for (auto uIter = updated.begin();
765                  uIter != updated.end();
766                  ++uIter) {
767                 resumeFrames((*uIter), work);
768             }
769         }
770     }
771     // Recurse through parse_frames
772     parsing_printf("[%s] Calling parse_frames again, work.size() = %d\n",
773                    __FILE__,
774                    work.size());
775     parse_frames(work, recursive);
776 }
777
778 void Parser::processCycle(vector<ParseFrame *> &work, bool recursive) {// If we've reached a fixedpoint and have remaining frames, we must
779     // have a cyclic dependency
780     vector<Function *> updated;
781     {
782         boost::lock_guard<DelayedFrames> g(delayed_frames);
783         parsing_printf("[%s] Fixed point reached (%d funcs with unknown return status)\n",
784                        __FILE__,
785                        delayed_frames.frames.size());
786
787         // Mark UNSET functions in cycle as NORETURN
788         // except if we're doing non-recursive parsing.
789         // If we're just parsing one function, we want
790         // to mark everything RETURN instead.
791         for (auto iter = delayed_frames.frames.begin(); iter != delayed_frames.frames.end(); ++iter) {
792             Function * func = iter->first;
793             if (func->retstatus() == UNSET) {
794                 if(recursive)
795                 {
796                     func->set_retstatus(NORETURN);
797                     func->obj()->cs()->incrementCounter(PARSE_NORETURN_HEURISTIC);
798                 }
799                 else
800                 {
801                     func->set_retstatus(RETURN);
802                 }
803                 updated.push_back(func);
804             }
805
806             set<ParseFrame *> vec = iter->second;
807             for (auto vIter = vec.begin(); vIter != vec.end(); ++vIter) {
808                 Function * delayed = (*vIter)->func;
809                 if (delayed->retstatus() == UNSET) {
810                     delayed->set_retstatus(NORETURN);
811                     delayed->obj()->cs()->incrementCounter(PARSE_NORETURN_HEURISTIC);
812                     updated.push_back(func);
813                 }
814             }
815         }
816
817     }
818
819     // We should have updated the return status of one or more frames; recurse
820     if (updated.size()) {
821         for (auto uIter = updated.begin();
822              uIter != updated.end();
823              ++uIter) {
824             resumeFrames((*uIter), work);
825         }
826
827         if (work.size()) {
828             parsing_printf("[%s] Updated retstatus of delayed frames, trying again; work.size() = %d\n",
829                            __FILE__,
830                            work.size());
831             parse_frames(work, recursive);
832         }
833     } else {
834         // We shouldn't get here
835         parsing_printf("[%s] No more work can be done (%d funcs with unknown return status)\n",
836                        __FILE__,
837                        delayed_frames.frames.size());
838         assert(0);
839     }
840 }
841
842 void Parser::cleanup_frames()  {
843   vector <ParseFrame *> pfv;
844   std::copy(frames.begin(), frames.end(), std::back_inserter(pfv));
845 #if USE_OPENMP
846 #pragma omp parallel for schedule(auto)
847   for (int i = 0; i < pfv.size(); i++) {
848     ParseFrame *pf = pfv[i];
849     if (pf) {
850       _parse_data->remove_frame(pf);
851       delete pf;
852     }
853   }
854   frames.clear();
855 #elif USE_CILK
856   cilk_for(unsigned i=0; i < pfv.size(); ++i) {
857     ParseFrame *pf = pfv[i];
858     if (pf) {
859       _parse_data->remove_frame(pf);
860       delete pf;
861     }
862   }
863 #else 
864   for(unsigned i=0; i < pfv.size(); ++i) {
865     ParseFrame *pf = pfv[i];
866     if (pf) {
867       _parse_data->remove_frame(pf);
868       delete pf;
869     }
870   }
871 #endif
872   frames.clear();
873 }
874
875 /* Finalizing all functions for consumption:
876   
877    - Finish delayed parsing
878    - Prepare and record FuncExtents for range-based lookup
879 */
880
881 // Could set cache_valid depending on whether the function is currently
882 // being parsed somewhere. 
883
884 void Parser::finalize_block(Block* b, Function* owner)
885 {
886     Address addr = b->start();
887     Address prev_insn = b->lastInsnAddr();
888     set<Block *> overlap;
889     CodeRegion *cr;
890     if(owner->region()->contains(addr))
891         cr = owner->region();
892     else
893         cr = _parse_data->reglookup(owner->region(),addr);
894
895     Block* exist = _parse_data->findBlock(cr, addr);
896     if(NULL == exist)
897     {
898         _parse_data->findBlocks(cr,addr,overlap);
899         if(overlap.size() > 1)
900             parsing_printf("[%s] address %lx overlapped by %d blocks\n",
901                            FILE__,addr,overlap.size());
902
903         /* Platform specific consistency test:
904            generally checking for whether the address is an
905            instruction boundary in a block */
906         for(set<Block *>::iterator sit=overlap.begin();sit!=overlap.end();++sit)
907         {
908             Block * ob = *sit;
909
910             // consistent will fill in prev_insn with the address of the
911             // instruction preceeding addr if addr is consistent
912             if(ob->consistent(addr,prev_insn)) {
913                 exist = b;
914                 break;
915             } else {
916                 parsing_printf("[%s] %lx is inconsistent with [%lx,%lx)\n",
917                                FILE__,addr,b->start(),b->end());
918                 _pcb.overlapping_blocks(ob,b);
919             }
920         }
921
922     }
923     if(exist) {
924         if(exist->start() != addr) {
925             Block* ret = split_block(owner, b, addr, prev_insn);
926             record_block(ret);
927         }
928     }
929     _pcb.addBlock(owner, b);
930
931 }
932
933 void
934 Parser::finalize(Function *f)
935 {
936     boost::unique_lock<boost::mutex> finalize_write_lock(finalize_mutex);
937     if(f->_cache_valid) {
938         return;
939     }
940
941     if(!f->_parsed) {
942         parsing_printf("[%s:%d] Parser::finalize(f[%lx]) "
943                                "forced parsing\n",
944                        FILE__,__LINE__,f->addr());
945         parse_at(f->addr(), true, f->src());
946     }
947
948     bool cache_value = true;
949     /* this is commented out to prevent a failure in tampersStack, but
950            this may be an incorrect approach to fixing the problem.
951     if(frame_status(f->region(), f->addr()) < ParseFrame::PARSED) {
952         // XXX prevent caching of blocks, extents for functions that
953         // are actively being parsed. This prevents callbacks and other
954         // functions called from within, e.g. parse_frame from setting
955         // the caching flag and preventing later updates to the blocks()
956         // vector during finalization.
957         cache_value = false;
958     }*/
959
960     parsing_printf("[%s] finalizing %s (%lx)\n",
961                    FILE__,f->name().c_str(),f->addr());
962
963     region_data * rd = _parse_data->findRegion(f->region());
964     assert(rd);
965
966     // finish delayed parsing and sorting
967     Function::blocklist blocks = f->blocks_int();
968
969     // is this the first time we've parsed this function?
970     if (unlikely( !f->_extents.empty() )) {
971         _parse_data->remove_extents(f->_extents);
972         f->_extents.clear();
973     }
974
975
976     if(blocks.empty()) {
977         f->_cache_valid = cache_value; // see above
978         return;
979     }
980
981     auto bit = blocks.begin();
982     FuncExtent * ext = NULL;
983     Address ext_s = (*bit)->start();
984     Address ext_e = ext_s;
985
986     for( ; bit != blocks.end(); ++bit) {
987         Block * b = *bit;
988         finalize_block(b, f);
989         if(b->start() > ext_e) {
990             ext = new FuncExtent(f,ext_s,ext_e);
991
992             // remove access history for ext before publishing it
993             // to concurrent readers to avoid false race reports.
994             // ext is written before it is published and only read
995             // thereafter.
996             race_detector_forget_access_history(ext, sizeof(*ext));
997
998             parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
999             f->_extents.push_back(ext);
1000             rd->funcsByRange.insert(ext);
1001             ext_s = b->start();
1002         }
1003         ext_e = b->end();
1004     }
1005     ext = new FuncExtent(f,ext_s,ext_e);
1006  
1007     // remove access history for ext before publishing it
1008     // to concurrent readers to avoid false race reports.
1009     // ext is written before it is published and only read
1010     // thereafter.
1011     race_detector_forget_access_history(ext, sizeof(*ext));
1012
1013     parsing_printf("%lx extent [%lx,%lx)\n",f->addr(),ext_s,ext_e);
1014     rd->funcsByRange.insert(ext);
1015     f->_extents.push_back(ext);
1016
1017     f->_cache_valid = cache_value; // see comment at function entry
1018
1019     if (unlikely( f->obj()->defensiveMode())) {
1020         // add fallthrough edges for calls assumed not to be returning
1021         // whose fallthrough blocks we parsed anyway (this happens if 
1022         // the FT block is also a branch target)
1023         Function::edgelist & edges = f->_call_edge_list;
1024         for (Function::edgelist::iterator eit = edges.begin();
1025              eit != edges.end();
1026              eit++)
1027         {
1028             boost::lock_guard<Block> g(*(*eit)->src());
1029             if (2 > (*eit)->src()->targets().size()) {
1030                 Block *ft = _parse_data->findBlock((*eit)->src()->region(),
1031                                                    (*eit)->src()->end());
1032                 if (ft && HASHDEF(f->_bmap,ft->start())) {
1033                     link((*eit)->src(),ft,CALL_FT,false);
1034                 }
1035             }
1036         }
1037     }
1038 }
1039
1040 void
1041 Parser::finalize()
1042 {
1043     if(_parse_state < FINALIZED) {
1044         finalize_funcs(hint_funcs);
1045         finalize_funcs(discover_funcs);
1046         _parse_state = FINALIZED;
1047     }
1048 }
1049
1050 void
1051 Parser::finalize_funcs(vector<Function *> &funcs)
1052 {
1053     vector<Function *>::iterator fit = funcs.begin();
1054     for( ; fit != funcs.end(); ++fit) {
1055         finalize(*fit);
1056     }
1057 }
1058
1059 void
1060 Parser::record_func(Function *f)
1061 {
1062     if(!f) return;
1063
1064     if(f->src() == HINT)
1065         hint_funcs.push_back(f);
1066     else
1067         discover_funcs.push_back(f);
1068
1069     sorted_funcs.insert(f);
1070
1071     _parse_data->record_func(f);
1072 }
1073
1074 void
1075 Parser::init_frame(ParseFrame & frame)
1076 {
1077     boost::lock_guard<ParseFrame> g(frame);
1078     Block * b = NULL;
1079     Block * split = NULL;
1080
1081     if ( ! frame.func->_entry )
1082     {
1083         // Find or create a block
1084         b = block_at(frame.func, frame.func->addr(),split);
1085         if(b) {
1086             frame.leadersToBlock[frame.func->addr()] = b;
1087             frame.func->_entry = b;
1088             frame.seed = new ParseWorkElem(NULL,NULL,frame.func->addr(),true,false);
1089             frame.pushWork(frame.seed);
1090         } else {
1091             parsing_printf("[%s] failed to initialize parsing frame\n",
1092                            FILE__);
1093             return;
1094         }
1095         if (split) {
1096             _pcb.splitBlock(split,b);
1097         }
1098     }
1099
1100     // FIXME these operations should move into the actual parsing
1101     Address ia_start = frame.func->addr();
1102     unsigned size =
1103             frame.codereg->offset() + frame.codereg->length() - ia_start;
1104     const unsigned char* bufferBegin =
1105             (const unsigned char *)(frame.func->isrc()->getPtrToInstruction(ia_start));
1106     InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
1107     InstructionAdapter_t* ah = InstructionAdapter_t::makePlatformIA_IAPI(obj().cs()->getArch(),
1108                                                                          dec, ia_start, frame.func->obj(),
1109                                                                          frame.codereg, frame.func->isrc(), b);
1110     if(ah->isStackFramePreamble()) {
1111         frame.func->_no_stack_frame = false;
1112     }
1113     frame.func->_saves_fp = ah->savesFP();
1114     delete ah;
1115 }
1116
1117 void ParseFrame::cleanup()
1118 {
1119     boost::lock_guard<ParseFrame> g(*this);
1120     for(unsigned i=0;i<work_bundles.size();++i)
1121         delete work_bundles[i];
1122     work_bundles.clear();
1123     if(seed)
1124         delete seed;
1125     seed = NULL;
1126 }
1127
1128 namespace {
1129     inline Edge * bundle_call_edge(ParseWorkBundle * b)
1130     {
1131         if(!b) return NULL;
1132
1133         vector<ParseWorkElem*> const& elems = b->elems();
1134         vector<ParseWorkElem*>::const_iterator it = elems.begin();
1135         for( ; it != elems.end(); ++it) {
1136             if((*it)->edge()->type() == CALL)
1137                 return (*it)->edge();
1138         }
1139         return NULL;
1140     }
1141
1142     /* 
1143      * Look up the next block for detection of straight-line
1144      * fallthrough edges into existing blocks.
1145      */
1146 }
1147
1148 void
1149 Parser::parse_frame(ParseFrame & frame, bool recursive) {
1150     boost::lock_guard<Function> g(*frame.func);
1151     /** Persistent intermediate state **/
1152     InstructionAdapter_t *ahPtr = NULL;
1153     ParseFrame::worklist_t & worklist = frame.worklist;
1154     dyn_hash_map<Address, Block *> & leadersToBlock = frame.leadersToBlock;
1155     Address & curAddr = frame.curAddr;
1156     Function * func = frame.func;
1157     dyn_hash_map<Address, bool> & visited = frame.visited;
1158     unsigned & num_insns = frame.num_insns;
1159     func->_cache_valid = false;
1160
1161     /** Non-persistent intermediate state **/
1162     Address nextBlockAddr;
1163     Block * nextBlock;
1164
1165     if (frame.status() == ParseFrame::UNPARSED) {
1166         parsing_printf("[%s] ==== starting to parse frame %lx ====\n",
1167                        FILE__,frame.func->addr());
1168         // prevents recursion of parsing
1169         frame.func->_parsed = true;
1170     } else {
1171         parsing_printf("[%s] ==== resuming parse of frame %lx ====\n",
1172                        FILE__,frame.func->addr());
1173         // Pull work that can be resumed off the delayedWork list
1174         std::map<ParseWorkElem *, Function *>::iterator iter;
1175
1176         vector<ParseWorkElem *> updated;
1177         vector<ParseWorkElem *>::iterator uIter;
1178
1179         for (iter = frame.delayedWork.begin();
1180              iter != frame.delayedWork.end();
1181              ++iter) {
1182             if (iter->second->retstatus() != UNSET) {
1183                 frame.pushWork(iter->first);
1184                 updated.push_back(iter->first);
1185             }
1186         }
1187         for (uIter = updated.begin(); uIter != updated.end(); ++uIter) {
1188             frame.delayedWork.erase(*uIter);
1189         }
1190     }
1191
1192
1193     frame.set_status(ParseFrame::PROGRESS);
1194
1195     while(!worklist.empty()) {
1196
1197         Block * cur = NULL;
1198         ParseWorkElem * work = frame.popWork();
1199         if (work->order() == ParseWorkElem::call) {
1200             Function * ct = NULL;
1201
1202             if (!work->callproc()) {
1203                 // If we're not doing recursive traversal, skip *all* of the call edge processing.
1204                 // We don't want to create a stub. We'll handle the assumption of a fallthrough
1205                 // target for the fallthrough work element, not the call work element. Catch blocks
1206                 // will get excluded but that's okay; our heuristic is not reliable enough that I'm willing to deploy
1207                 // it when we'd only be detecting a non-returning callee by name anyway.
1208                 // --BW 12/2012
1209                 if (!recursive) {
1210                     parsing_printf("[%s] non-recursive parse skipping call %lx->%lx\n",
1211                                    FILE__, work->edge()->src()->lastInsnAddr(), work->target());
1212                     continue;
1213                 }
1214
1215                 parsing_printf("[%s] binding call %lx->%lx\n",
1216                                FILE__,work->edge()->src()->lastInsnAddr(),work->target());
1217
1218                 pair<Function*,Edge*> ctp =
1219                         bind_call(frame,
1220                                   work->target(),
1221                                   work->edge()->src(),
1222                                   work->edge());
1223                 ct = ctp.first;
1224
1225                 work->mark_call();
1226             } else {
1227                 ct = _parse_data->findFunc(frame.codereg,work->target());
1228             }
1229
1230             if (recursive && ct &&
1231                 (frame_status(ct->region(),ct->addr())==ParseFrame::UNPARSED ||
1232                  frame_status(ct->region(),ct->addr())==ParseFrame::BAD_LOOKUP)) {
1233                 // suspend this frame and parse the next
1234                 parsing_printf("    [suspend frame %lx]\n", func->addr());
1235                 frame.call_target = ct;
1236                 frame.set_status(ParseFrame::CALL_BLOCKED);
1237                 // need to re-visit this edge
1238                 frame.pushWork(work);
1239                 if (ahPtr) delete ahPtr;
1240                 return;
1241             } else if (ct && work->tailcall()) {
1242                 // XXX The target has been or is currently being parsed (else
1243                 //     the previous conditional would have been taken),
1244                 //     so if its return status is unset then this
1245                 //     function has to take UNKNOWN
1246                 if (func->retstatus() != RETURN) {
1247                     if (ct->retstatus() > NORETURN)
1248                         func->set_retstatus(ct->retstatus());
1249                     else if (ct->retstatus() == UNSET)
1250                         func->set_retstatus(UNKNOWN);
1251                 }
1252             }
1253
1254             continue;
1255         } else if (work->order() == ParseWorkElem::call_fallthrough) {
1256             // check associated call edge's return status
1257             Edge * ce = bundle_call_edge(work->bundle());
1258             if (!ce) {
1259                 // odd; no call edge in this bundle
1260                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
1261                                FILE__,work->edge()->src()->lastInsnAddr());
1262             } else {
1263                 // check for system call FT
1264                 Edge* edge = work->edge();
1265                 Block::Insns blockInsns;
1266 //                boost::lock_guard<Block> src_guard(*edge->src());
1267                 edge->src()->getInsns(blockInsns);
1268                 auto prev = blockInsns.rbegin();
1269                 InstructionAPI::Instruction prevInsn = prev->second;
1270                 bool is_nonret = false;
1271
1272                 if (prevInsn.getOperation().getID() == e_syscall) {
1273                     Address src = edge->src()->lastInsnAddr();
1274
1275
1276                     // Need to determine if system call is non-returning
1277                     long int syscallNumber;
1278                     if (!getSyscallNumber(func, edge->src(), src, frame.codereg->getArch(), syscallNumber)) {
1279                         // If we cannot retrieve a syscall number, assume the syscall returns
1280                         parsing_printf("[%s] could not retrieve syscall edge, assuming returns\n", FILE__);
1281                     } else {
1282                         if (obj().cs()->nonReturningSyscall(syscallNumber)) {
1283                             is_nonret = true;
1284                         }
1285                     }
1286
1287                     if (is_nonret) {
1288                         parsing_printf("[%s] no fallthrough for non-returning syscall\n",
1289                                        FILE__,
1290                                        work->edge()->src()->lastInsnAddr());
1291
1292                         // unlink tempsink fallthrough edge
1293                         Edge * remove = work->edge();
1294                         remove->src()->removeTarget(remove);
1295                         factory().destroy_edge(remove);
1296                         continue;
1297                     }
1298                 } else {
1299                     Address target = ce->trg()->start();
1300                     Function * ct = _parse_data->findFunc(frame.codereg,target);
1301                     bool is_plt = false;
1302
1303                     // check if associated call edge's return status is still unknown
1304                     if (ct && (ct->retstatus() == UNSET) ) {
1305                         // Delay parsing until we've finished the corresponding call edge
1306                         parsing_printf("[%s] Parsing FT edge %lx, corresponding callee (%s) return status unknown; delaying work\n",
1307                                        __FILE__,
1308                                        work->edge()->src()->lastInsnAddr(),
1309                                        ct->name().c_str());
1310
1311                         // Add work to ParseFrame's delayed list
1312                         frame.pushDelayedWork(work, ct);
1313
1314                         // Continue other work for this frame
1315                         continue;
1316                     }
1317
1318                     is_plt = HASHDEF(plt_entries,target);
1319
1320                     // CodeSource-defined tests 
1321                     is_nonret = obj().cs()->nonReturning(target);
1322                     if (is_nonret) {
1323                         parsing_printf("\t Disallowing FT edge: CodeSource reports nonreturning\n");
1324                     }
1325                     if (!is_nonret && is_plt) {
1326                         is_nonret |= obj().cs()->nonReturning(plt_entries[target]);
1327                         if (is_nonret) {
1328                             parsing_printf("\t Disallowing FT edge: CodeSource reports PLT nonreturning\n");
1329                         }
1330                     }
1331                     // Parsed return status tests
1332                     if (!is_nonret && !is_plt && ct) {
1333                         is_nonret |= (ct->retstatus() == NORETURN);
1334                         if (is_nonret) {
1335                             parsing_printf("\t Disallowing FT edge: function is non-returning\n");
1336                         }
1337                     }
1338                     // Call-stack tampering tests
1339                     if (unlikely(!is_nonret && frame.func->obj()->defensiveMode() && ct)) {
1340                         is_nonret |= (ct->retstatus() == UNKNOWN);
1341                         if (is_nonret) {
1342                             parsing_printf("\t Disallowing FT edge: function in "
1343                                                    "defensive binary may not return\n");
1344                             mal_printf("Disallowing FT edge: function %lx in "
1345                                                "defensive binary may not return\n", ct->addr());
1346                         } else {
1347                             StackTamper ct_tamper = ct->tampersStack();
1348                             is_nonret |= (TAMPER_NONZERO == ct_tamper);
1349                             is_nonret |= (TAMPER_ABS == ct_tamper);
1350                             if (is_nonret) {
1351                                 mal_printf("Disallowing FT edge: function at %lx "
1352                                                    "tampers with its stack\n", ct->addr());
1353                                 parsing_printf("\t Disallowing FT edge: function "
1354                                                        "tampers with its stack\n");
1355                             }
1356                         }
1357                     }
1358                     if (is_nonret) {
1359                         parsing_printf("[%s] no fallthrough for non-returning call "
1360                                                "to %lx at %lx\n",FILE__,target,
1361                                        work->edge()->src()->lastInsnAddr());
1362
1363                         // unlink tempsink fallthrough edge
1364                         Edge * remove = work->edge();
1365                         remove->src()->removeTarget(remove);
1366                         factory().destroy_edge(remove);
1367                         continue;
1368                     } else
1369                         // Invalidate cache_valid for all sharing functions
1370                         invalidateContainingFuncs(func, ce->src());
1371                 }
1372             }
1373         } else if (work->order() == ParseWorkElem::seed_addr) {
1374             cur = leadersToBlock[work->target()];
1375         } else if (work->order() == ParseWorkElem::resolve_jump_table) {
1376             // resume to resolve jump table
1377             auto work_ah = work->ah();
1378             parsing_printf("... continue parse indirect jump at %lx\n", work_ah->getAddr());
1379             Block *nextBlock = work->cur();
1380             if (nextBlock->last() != work_ah->getAddr()) {
1381                 // The block has been split
1382                 region_data * rd = _parse_data->findRegion(frame.codereg);
1383                 set<Block*> blocks;
1384                 rd->blocksByRange.find(work_ah->getAddr(), blocks);
1385                 for (auto bit = blocks.begin(); bit != blocks.end(); ++bit) {
1386                     if ((*bit)->last() == work_ah->getAddr()) {
1387                         nextBlock = (*bit);
1388                         break;
1389                     }
1390                 }
1391
1392             }
1393             ProcessCFInsn(frame,nextBlock,work->ah());
1394             continue;
1395         }
1396             // call fallthrough case where we have already checked that
1397             // the target returns. this is used in defensive mode.
1398         else if (work->order() == ParseWorkElem::checked_call_ft) {
1399             Edge* ce = bundle_call_edge(work->bundle());
1400             if (ce != NULL) {
1401                 invalidateContainingFuncs(func, ce->src());
1402             } else {
1403                 parsing_printf("[%s] unexpected missing call edge at %lx\n",
1404                                FILE__,work->edge()->src()->lastInsnAddr());
1405             }
1406         }
1407
1408         if (NULL == cur) {
1409             pair<Block*,Edge*> newedge =
1410                     add_edge(frame,
1411                              frame.func,
1412                              work->edge()->src(),
1413                              work->target(),
1414                              work->edge()->type(),
1415                              work->edge());
1416             cur = newedge.first;
1417         }
1418
1419         if (HASHDEF(visited,cur->start()))
1420         {
1421             parsing_printf("[%s] skipping locally parsed target at %lx\n",
1422                            FILE__,work->target());
1423             continue;
1424         }
1425         visited[cur->start()] = true;
1426         leadersToBlock[cur->start()] = cur;
1427
1428         if (!cur->_parsed)
1429         {
1430             parsing_printf("[%s] parsing block %lx\n",
1431                            FILE__,cur->start());
1432             if (frame.func->obj()->defensiveMode()) {
1433                 mal_printf("new block at %lx (0x%lx)\n",cur->start(), cur);
1434             }
1435
1436             cur->_parsed = true;
1437             curAddr = cur->start();
1438         } else {
1439             parsing_printf("[%s] deferring parse of shared block %lx\n",
1440                            FILE__,cur->start());
1441             if (func->retstatus() < UNKNOWN) {
1442                 // we've parsed into another function, if we've parsed
1443                 // into it's entry point, set retstatus to match it
1444                 Function * other_func = _parse_data->findFunc(
1445                         func->region(), cur->start());
1446                 if (other_func && other_func->retstatus() > UNKNOWN) {
1447                     func->set_retstatus(other_func->retstatus());
1448                 } else {
1449                     func->set_retstatus(UNKNOWN);
1450                 }
1451             }
1452             // The edge to this shared block is changed from
1453             // "going to sink" to going to this shared block.
1454             // This changes the function boundary, so we need to
1455             // invalidate the cache.
1456             func->_cache_valid = false;
1457             continue;
1458         }
1459
1460         /*
1461          * External consumers of parsing may have invoked finalizing
1462          * methods from callback routines. Ensure that the caches
1463          * are invalidated because a new block is being added to the
1464          * function's view
1465          */
1466         func->_cache_valid = false;
1467
1468         // NB Using block's region() here because it may differ from the
1469         //    function's if control flow has jumped to another region
1470         unsigned size =
1471                 cur->region()->offset() + cur->region()->length() - curAddr;
1472         const unsigned char* bufferBegin =
1473                 (const unsigned char *)(func->isrc()->getPtrToInstruction(curAddr));
1474         InstructionDecoder dec(bufferBegin,size,frame.codereg->getArch());
1475
1476         if (!ahPtr)
1477             ahPtr = InstructionAdapter_t::makePlatformIA_IAPI(func->obj()->cs()->getArch(), dec, curAddr, func->obj(),
1478                                                               cur->region(), func->isrc(), cur);
1479         else
1480             ahPtr->reset(dec,curAddr,func->obj(),
1481                          cur->region(), func->isrc(), cur);
1482
1483         InstructionAdapter_t * ah = ahPtr;
1484
1485         using boost::tuples::tie;
1486         tie(nextBlockAddr,nextBlock) = func->get_next_block(
1487                 frame.curAddr, frame.codereg);
1488
1489         bool isNopBlock = ah->isNop();
1490
1491         while(true) {
1492             curAddr = ah->getAddr();
1493             /** Check for straight-line fallthrough **/
1494             if (curAddr == nextBlockAddr) {
1495                 parsing_printf("[%s] straight-line parse into block at %lx\n",
1496                                FILE__,curAddr);
1497                 if (frame.func->obj()->defensiveMode()) {
1498                     mal_printf("straight-line parse into block at %lx\n",curAddr);
1499                 }
1500                 ah->retreat();
1501                 curAddr = ah->getAddr();
1502
1503                 end_block(cur,ahPtr);
1504                 pair<Block*,Edge*> newedge =
1505                         add_edge(frame,frame.func,cur,
1506                                  nextBlockAddr,FALLTHROUGH,NULL);
1507
1508                 if (!HASHDEF(visited,nextBlockAddr) &&
1509                     !HASHDEF(leadersToBlock,nextBlockAddr)) {
1510                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1511                                    FILE__,__LINE__,nextBlockAddr);
1512
1513                     frame.pushWork(
1514                             frame.mkWork(
1515                                     NULL,
1516                                     newedge.second,
1517                                     nextBlockAddr,
1518                                     true,
1519                                     false)
1520                     );
1521                     /* preserved as example of leaky code; use mkWork instead
1522                     frame.pushWork(
1523                         new ParseWorkElem(
1524                             NULL,
1525                             newedge.second,
1526                             nextBlockAddr,
1527                             true,
1528                             false)
1529                         );
1530                     */
1531                     leadersToBlock[nextBlockAddr] = nextBlock;
1532                 }
1533                 break;
1534             } else if (curAddr > nextBlockAddr) {
1535                 parsing_printf("[%s:%d] inconsistent instruction stream: "
1536                                        "%lx is within [%lx,%lx)\n",
1537                                FILE__,__LINE__,curAddr,
1538                                nextBlock->start(),nextBlock->end());
1539
1540                 // NB "cur" hasn't ended, so its range may
1541                 // not look like it overlaps with nextBlock
1542                 _pcb.overlapping_blocks(cur,nextBlock);
1543
1544                 tie(nextBlockAddr,nextBlock) =
1545                         func->get_next_block(frame.curAddr, frame.codereg);
1546             }
1547
1548             // per-instruction callback notification
1549             ParseCallback::insn_details insn_det;
1550             insn_det.insn = ah;
1551
1552             parsing_printf("[%s:%d] curAddr 0x%lx: %s \n",
1553                            FILE__,__LINE__,curAddr, insn_det.insn->getInstruction().format().c_str() );
1554
1555             if (func->_is_leaf_function) {
1556                 Address ret_addr;
1557                 func->_is_leaf_function = !(insn_det.insn->isReturnAddrSave(ret_addr));
1558                 parsing_printf("[%s:%d] leaf %d funcname %s \n",
1559                                FILE__,__LINE__,func->_is_leaf_function, func->name().c_str());
1560                 if (!func->_is_leaf_function) func->_ret_addr = ret_addr;
1561             }
1562
1563             _pcb.instruction_cb(func,cur,curAddr,&insn_det);
1564
1565             if (isNopBlock && !ah->isNop()) {
1566                 ah->retreat();
1567
1568                 end_block(cur,ahPtr);
1569                 pair<Block*,Edge*> newedge =
1570                         add_edge(frame,frame.func,cur,curAddr,FALLTHROUGH,NULL);
1571                 Block * targ = newedge.first;
1572
1573                 parsing_printf("[%s:%d] nop-block ended at %lx\n",
1574                                FILE__,__LINE__,curAddr);
1575                 if (targ && !HASHDEF(visited,targ->start())) {
1576                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1577                                    FILE__,__LINE__,targ->start());
1578
1579                     frame.pushWork(
1580                             frame.mkWork(
1581                                     NULL,
1582                                     newedge.second,
1583                                     targ->start(),
1584                                     true,
1585                                     false)
1586                     );
1587                     leadersToBlock[targ->start()] = targ;
1588                 }
1589                 break;
1590             }
1591
1592             /** Particular instruction handling (calls, branches, etc) **/
1593             ++num_insns;
1594
1595             if(ah->hasCFT()) {
1596                 if (ah->isIndirectJump()) {
1597                     // Create a work element to represent that
1598                     // we will resolve the jump table later
1599                     end_block(cur,ahPtr);
1600                     frame.pushWork( frame.mkWork( work->bundle(), cur, ahPtr));
1601                 } else {
1602                     ProcessCFInsn(frame,cur,ahPtr);
1603                 }
1604                 break;
1605             } else if (func->_saves_fp &&
1606                        func->_no_stack_frame &&
1607                        ah->isFrameSetupInsn()) { // isframeSetup is expensive
1608                 func->_no_stack_frame = false;
1609             } else if (ah->isLeave()) {
1610                 func->_no_stack_frame = false;
1611             } else if ( ah->isAbort() ) {
1612                 // 4. `abort-causing' instructions
1613                 end_block(cur,ahPtr);
1614                 //link(cur, sink_block, DIRECT, true);
1615                 break;
1616             } else if ( ah->isInvalidInsn() ) {
1617                 // 4. Invalid or `abort-causing' instructions
1618                 end_block(cur,ahPtr);
1619                 link(cur, Block::sink_block, DIRECT, true);
1620                 break;
1621             } else if ( ah->isInterruptOrSyscall() ) {
1622                 // 5. Raising instructions
1623                 end_block(cur,ahPtr);
1624
1625                 pair<Block*,Edge*> newedge =
1626                         add_edge(frame,frame.func,cur,ah->getNextAddr(),FALLTHROUGH,NULL);
1627                 Block * targ = newedge.first;
1628
1629                 if (targ && !HASHDEF(visited,targ->start()) &&
1630                     !HASHDEF(leadersToBlock,targ->start())) {
1631                     parsing_printf("[%s:%d] pushing %lx onto worklist\n",
1632                                    FILE__,__LINE__,targ->start());
1633
1634                     frame.pushWork(
1635                             frame.mkWork(
1636                                     NULL,
1637                                     newedge.second,
1638                                     targ->start(),
1639                                     true,
1640                                     false)
1641                     );
1642                     leadersToBlock[targ->start()] = targ;
1643                 }
1644                 if (unlikely(func->obj()->defensiveMode())) {
1645                     fprintf(stderr,"parsed bluepill insn sysenter or syscall "
1646                             "in defensive mode at %lx\n",curAddr);
1647                 }
1648                 break;
1649             } else if (unlikely(func->obj()->defensiveMode())) {
1650                 if (!_pcb.hasWeirdInsns(func) && ah->isGarbageInsn()) {
1651                     // add instrumentation at this addr so we can
1652                     // extend the function if this really executes
1653                     ParseCallback::default_details det(
1654                             (unsigned char*) cur->region()->getPtrToInstruction(cur->lastInsnAddr()),
1655                             cur->end() - cur->lastInsnAddr(),
1656                             true);
1657                     _pcb.abruptEnd_cf(cur->lastInsnAddr(),cur,&det);
1658                     _pcb.foundWeirdInsns(func);
1659                     end_block(cur,ahPtr);
1660                     // allow invalid instructions to end up as a sink node.
1661                     link(cur, Block::sink_block, DIRECT, true);
1662                     break;
1663                 } else if (ah->isNopJump()) {
1664                     // patch the jump to make it a nop, and re-set the 
1665                     // instruction adapter so we parse the instruction
1666                     // as a no-op this time, allowing the subsequent
1667                     // instruction to be parsed correctly
1668                     mal_printf("Nop jump at %lx, changing it to nop\n",ah->getAddr());
1669                     _pcb.patch_nop_jump(ah->getAddr());
1670                     unsigned bufsize =
1671                             func->region()->offset() + func->region()->length() - ah->getAddr();
1672                     func->region()->offset() + func->region()->length() - ahPtr->getAddr();
1673                     const unsigned char* bufferBegin = (const unsigned char *)
1674                             (func->isrc()->getPtrToInstruction(ah->getAddr()));
1675                     dec = InstructionDecoder
1676                             (bufferBegin, bufsize, frame.codereg->getArch());
1677                     ah->reset(dec, curAddr, func->obj(),
1678                               func->region(), func->isrc(), cur);
1679                 } else {
1680                     entryID id = ah->getInstruction().getOperation().getID();
1681                     switch (id) {
1682                         case e_rdtsc:
1683                             fprintf(stderr,"parsed bluepill insn rdtsc at %lx\n",curAddr);
1684                             break;
1685                         case e_sldt:
1686                             fprintf(stderr,"parsed bluepill insn sldt at %lx\n",curAddr);
1687                             break;
1688                         default:
1689                             break;
1690                     }
1691                 }
1692             } else {
1693                 // default
1694             }
1695
1696             /** Check for overruns of valid address space **/
1697             if (!is_code(func,ah->getNextAddr())) {
1698                 parsing_printf("[%s] next insn %lx is invalid\n",
1699                                FILE__,ah->getNextAddr());
1700
1701                 end_block(cur,ahPtr);
1702                 // We need to tag the block with a sink edge
1703                 link(cur, Block::sink_block, DIRECT, true);
1704                 break;
1705             } else if (!cur->region()->contains(ah->getNextAddr())) {
1706                 parsing_printf("[%s] next address %lx is outside [%lx,%lx)\n",
1707                                FILE__,ah->getNextAddr(),
1708                                cur->region()->offset(),
1709                                cur->region()->offset()+cur->region()->length());
1710                 end_block(cur,ahPtr);
1711                 // We need to tag the block with a sink edge
1712                 link(cur, Block::sink_block, DIRECT, true);
1713                 break;
1714             }
1715             ah->advance();
1716         }
1717     }
1718     if (ahPtr) delete ahPtr;
1719     // Check if parsing is complete
1720     if (!frame.delayedWork.empty()) {
1721         frame.set_status(ParseFrame::FRAME_DELAYED);
1722         return;
1723     }
1724
1725     /** parsing complete **/
1726     if (HASHDEF(plt_entries,frame.func->addr())) {
1727 //        if (obj().cs()->nonReturning(frame.func->addr())) {
1728         if (obj().cs()->nonReturning(plt_entries[frame.func->addr()])) {
1729             frame.func->set_retstatus(NORETURN);
1730         } else {
1731             frame.func->set_retstatus(UNKNOWN);
1732         }
1733
1734         // Convenience -- adopt PLT name
1735         frame.func->_name = plt_entries[frame.func->addr()];
1736     } else if (frame.func->retstatus() == UNSET) {
1737         frame.func->set_retstatus(NORETURN);
1738     }
1739
1740     frame.set_status(ParseFrame::PARSED);
1741
1742     if (unlikely(obj().defensiveMode())) {
1743         // calculate this after setting the function to PARSED, so that when
1744         // we finalize the function we'll actually save the results and won't
1745         // re-finalize it
1746         func->tampersStack();
1747     }
1748     _pcb.newfunction_retstatus( func );
1749 }
1750
1751 void
1752 Parser::end_block(Block * b, InstructionAdapter_t * ah)
1753 {
1754     b->_lastInsn = ah->getAddr();
1755     b->updateEnd(ah->getNextAddr());
1756
1757 //    record_block(b);
1758 }
1759
1760 void
1761 Parser::record_block(Block *b)
1762 {
1763     parsing_printf("[%s:%d] recording block [%lx,%lx)\n",
1764                    FILE__,__LINE__,b->start(),b->end());
1765     _parse_data->record_block(b->region(),b);
1766 }
1767
1768
1769 Block *
1770 Parser::block_at(
1771         Function * owner,
1772         Address addr,
1773         Block * & split)
1774 {
1775 //    ScopeLock<Mutex<true> > l(work_mutex);
1776
1777     Block * exist = NULL;
1778     Block * ret = NULL;
1779     Block * inconsistent = NULL;
1780     Address prev_insn = 0;
1781
1782     split = NULL;
1783     CodeRegion *cr;
1784     if(owner->region()->contains(addr))
1785         cr = owner->region();
1786     else
1787         cr = _parse_data->reglookup(owner->region(),addr);
1788
1789
1790     if(!is_code(owner,addr)) {
1791         parsing_printf("[%s] block address %lx rejected by isCode()\n",
1792                        FILE__,addr);
1793         return NULL;
1794     }
1795
1796
1797     {
1798 #ifdef JOHNMC_REMOVE_DEADLOCK
1799     boost::lock_guard<Function> g(*owner);
1800 #endif
1801     for(auto i = owner->blocks_begin();
1802         i != owner->blocks_end();
1803         ++i)
1804     {
1805         if((*i)->start() == addr) return *i;
1806         if((*i)->start() < addr && addr < (*i)->end()) {
1807             return split_block(owner, *i, addr, prev_insn);
1808         }
1809     }
1810     return _cfgfact._mkblock(owner, cr, addr);
1811     }
1812 }
1813
1814 pair<Block *,Edge *>
1815 Parser::add_edge(
1816         ParseFrame & frame,
1817         Function * owner,
1818         Block * src,
1819         Address dst,
1820         EdgeTypeEnum et,
1821         Edge * exist)
1822 {
1823     // adjust anything pointing to zero to force it to sink
1824     if(dst == 0) { dst = -1; }
1825     Block * split = NULL;
1826     Block * ret = NULL;
1827     Edge * newedge = NULL;
1828     pair<Block *, Edge *> retpair((Block *) NULL, (Edge *) NULL);
1829
1830     if(!is_code(owner,dst)) {
1831         parsing_printf("[%s] target address %lx rejected by isCode()\n",dst);
1832         return retpair;
1833     }
1834
1835     ret = block_at(owner,dst,split);
1836     retpair.first = ret;
1837
1838     if(split == src) {
1839         // special case -- same block
1840         src = ret;
1841     }
1842
1843     if(split && HASHDEF(frame.visited,split->start())) {
1844         // prevent "delayed parsing" of extant block if 
1845         // this frame has already visited it
1846         frame.visited[ret->start()] = true;
1847         frame.leadersToBlock[ret->start()] = ret;
1848     }
1849
1850     if(NULL == exist) {
1851         newedge = link(src,ret,et,false);
1852         retpair.second = newedge;
1853     } else {
1854         relink(exist,src,ret);
1855         retpair.second = exist;
1856     }
1857
1858     return retpair;
1859 }
1860
1861 Block *
1862 Parser::split_block(
1863         Function * owner,
1864         Block *b,
1865         Address addr,
1866         Address previnsn)
1867 {
1868     Block * ret;
1869     CodeRegion * cr;
1870     bool isRetBlock = false;
1871     if(owner->region()->contains(b->start()))
1872         cr = owner->region();
1873     else
1874         cr = _parse_data->reglookup(owner->region(),b->start());
1875     region_data * rd = _parse_data->findRegion(cr);
1876
1877     // enable for extra-safe testing, but callers are responsbible
1878     // assert(b->consistent(addr);
1879
1880     ret = factory()._mkblock(owner,cr,addr);
1881
1882     // move out edges
1883
1884     {
1885
1886         Block::edgelist &trgs = b->_trglist;
1887         Block::edgelist::iterator tit = trgs.begin();
1888         for (; tit != trgs.end(); ++tit) {
1889             Edge *e = *tit;
1890             e->_source = ret;
1891             ret->_trglist.push_back(e);
1892         }
1893         if (!trgs.empty() && RET == (*trgs.begin())->type()) {
1894             isRetBlock = true;
1895         }
1896         trgs.clear();
1897         ret->updateEnd(b->end());
1898         ret->_lastInsn = b->_lastInsn;
1899         ret->_parsed = true;
1900         link(b,ret,FALLTHROUGH,false);
1901
1902         record_block(ret);
1903
1904         // b's range has changed
1905         //rd->updateBlockEnd(b, addr, previnsn);
1906         // Any functions holding b that have already been finalized
1907         // need to have their caches invalidated so that they will
1908         // find out that they have this new 'ret' block
1909         std::set<Function*> prev_owners;
1910         rd->findFuncs(b->start(),prev_owners);
1911         for(std::set<Function*>::iterator oit = prev_owners.begin();
1912             oit != prev_owners.end(); ++oit)
1913         {
1914             Function * po = *oit;
1915             if (po->_cache_valid) {
1916                 po->_cache_valid = false;
1917                 parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
1918                                        "func at %lx\n",
1919                                FILE__,__LINE__,b->start(),b->end(),po->addr());
1920             }
1921             if (isRetBlock) {
1922                 po->_retBL.clear(); //could remove b from the vector instead of clearing it, not sure what's cheaper
1923             }
1924         }
1925         // KEVINTODO: study performance impact of this callback
1926         _pcb.splitBlock(b,ret);
1927     }
1928
1929     return ret;
1930 }
1931
1932
1933 pair<Function *,Edge*>
1934 Parser::bind_call(ParseFrame & frame, Address target, Block * cur, Edge * exist)
1935 {
1936     Function * tfunc = NULL;
1937     Block * tblock = NULL;
1938     FuncSource how = RT;
1939     if(frame.func->_src == GAP || frame.func->_src == GAPRT)
1940         how = GAPRT;
1941
1942     // look it up
1943     tfunc = _parse_data->get_func(frame.codereg,target,how);
1944     if(!tfunc) {
1945         parsing_printf("[%s:%d] can't bind call to %lx\n",
1946                        FILE__,__LINE__,target);
1947         return pair<Function*,Edge*>((Function *) NULL,exist);
1948     }
1949     _pcb.discover_function(tfunc);
1950
1951     // add an edge
1952     pair<Block*,Edge*> ret = add_edge(frame,tfunc,cur,target,CALL,exist);
1953     tblock = ret.first;
1954     if(!tblock) {
1955         parsing_printf("[%s:%d] can't bind call to %lx\n",
1956                        FILE__,__LINE__,target);
1957         return pair<Function*,Edge*>((Function *) NULL,exist);
1958     }
1959
1960     return pair<Function*,Edge*>(tfunc,ret.second);
1961 }
1962
1963 Function *
1964 Parser::findFuncByEntry(CodeRegion *r, Address entry)
1965 {
1966     if(_parse_state < PARTIAL) {
1967         parsing_printf("[%s:%d] Parser::findFuncByEntry([%lx,%lx),%lx) "
1968                                "forced parsing\n",
1969                        FILE__,__LINE__,r->low(),r->high(),entry);
1970         parse();
1971     }
1972     return _parse_data->findFunc(r,entry);
1973 }
1974
1975 int
1976 Parser::findFuncs(CodeRegion *r, Address addr, set<Function *> & funcs)
1977 {
1978     if(_parse_state < COMPLETE) {
1979         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1980                                "forced parsing\n",
1981                        FILE__,__LINE__,r->low(),r->high(),addr);
1982         parse();
1983     }
1984     if(_parse_state < FINALIZED) {
1985         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,...) "
1986                                "forced finalization\n",
1987                        FILE__,__LINE__,r->low(),r->high(),addr);
1988         finalize();
1989     }
1990     return _parse_data->findFuncs(r,addr,funcs);
1991 }
1992
1993 int
1994 Parser::findFuncs(CodeRegion *r, Address start, Address end, set<Function *> & funcs)
1995 {
1996     if(_parse_state < COMPLETE) {
1997         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
1998                                "forced parsing\n",
1999                        FILE__,__LINE__,r->low(),r->high(),start,end);
2000         parse();
2001     }
2002     if(_parse_state < FINALIZED) {
2003         parsing_printf("[%s:%d] Parser::findFuncs([%lx,%lx),%lx,%lx) "
2004                                "forced finalization\n",
2005                        FILE__,__LINE__,r->low(),r->high(),start,end);
2006         finalize();
2007     }
2008     return _parse_data->findFuncs(r,start,end,funcs);
2009 }
2010
2011 Block *
2012 Parser::findBlockByEntry(CodeRegion *r, Address entry)
2013 {
2014     if(_parse_state < PARTIAL) {
2015         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
2016                                "forced parsing\n",
2017                        FILE__,__LINE__,r->low(),r->high(),entry);
2018         parse();
2019     }
2020     return _parse_data->findBlock(r,entry);
2021 }
2022
2023 Block *
2024 Parser::findNextBlock(CodeRegion *r, Address addr)
2025 {
2026     if(_parse_state < PARTIAL) {
2027         parsing_printf("[%s:%d] Parser::findBlockByEntry([%lx,%lx),%lx) "
2028                                "forced parsing\n",
2029                        FILE__,__LINE__,r->low(),r->high(),addr);
2030         parse();
2031     }
2032     return _parse_data->findRegion(r)->get_next_block(addr).second;
2033 }
2034
2035 int
2036 Parser::findBlocks(CodeRegion *r, Address addr, set<Block *> & blocks)
2037 {
2038     if(_parse_state < COMPLETE) {
2039         parsing_printf("[%s:%d] Parser::findBlocks([%lx,%lx),%lx,...) "
2040                                "forced parsing\n",
2041                        FILE__,__LINE__,r->low(),r->high(),addr);
2042         parse();
2043     }
2044     return _parse_data->findBlocks(r,addr,blocks);
2045 }
2046
2047 // find blocks without parsing.
2048 int Parser::findCurrentBlocks(CodeRegion* cr, Address addr,
2049                               std::set<Block*>& blocks) {
2050     return _parse_data->findBlocks(cr, addr, blocks);
2051 }
2052
2053 int Parser::findCurrentFuncs(CodeRegion * cr, Address addr, std::set<Function*> &funcs) {
2054     return _parse_data->findFuncs(cr, addr, funcs);
2055 }
2056
2057 Edge*
2058 Parser::link(Block *src, Block *dst, EdgeTypeEnum et, bool sink)
2059 {
2060     assert(et != NOEDGE);
2061     Edge * e = factory()._mkedge(src,dst,et);
2062     e->_type._sink = sink;
2063     src->addTarget(e);
2064     dst->addSource(e);
2065     _pcb.addEdge(src, e, ParseCallback::target);
2066     _pcb.addEdge(dst, e, ParseCallback::source);
2067     return e;
2068 }
2069
2070 /* 
2071  * During parsing, all edges are temporarily linked to the _tempsink
2072  * block. Adding and then removing edges to and from this block is
2073  * wasteful, especially given that removal is an O(N) operation with
2074  * vector storage. This call thus does not record edges into the sink.
2075  * These edges are guaranteed to have been relinked when parsing is
2076  * in state COMPLETE.
2077  *
2078  * NB This introduces an inconsistency into the CFG invariant that all
2079  *    targets of an edge have that edge in their source list if the
2080  *    data structures are queried during parsing. Extenders of 
2081  *    parsing callbacks are the only ones who can see this state;
2082  *    they have been warned and should know better.
2083  */
2084 Edge*
2085 Parser::link_tempsink(Block *src, EdgeTypeEnum et)
2086 {
2087     Edge * e = factory()._mkedge(src,Block::sink_block,et);
2088     e->_type._sink = true;
2089     src->_trglist.push_back(e);
2090     return e;
2091 }
2092
2093 void
2094 Parser::relink(Edge * e, Block *src, Block *dst)
2095 {
2096     assert(e);
2097     bool addSrcAndDest = true;
2098     if(src != e->src()) {
2099         assert(e);
2100         e->src()->removeTarget(e);
2101         _pcb.removeEdge(e->src(), e, ParseCallback::target);
2102         assert(e);
2103         e->_source = src;
2104         src->addTarget(e);
2105         _pcb.addEdge(src, e, ParseCallback::target);
2106         addSrcAndDest = false;
2107     }
2108     if(dst != e->trg()) {
2109         assert(e);
2110         if(e->trg() != Block::sink_block) {
2111             assert(e);
2112             if(e->trg()) {
2113                 e->trg()->removeSource(e);
2114             }
2115             _pcb.removeEdge(e->trg(), e, ParseCallback::source);
2116             addSrcAndDest = false;
2117         }
2118         dst->addSource(e);
2119         _pcb.addEdge(dst, e, ParseCallback::source);
2120         if (addSrcAndDest) {
2121             // We're re-linking a sinkEdge to be a non-sink edge; since 
2122             // we don't inform PatchAPI of temporary sinkEdges, we have
2123             // to add both the source AND target edges
2124             _pcb.addEdge(src, e, ParseCallback::target);
2125         }
2126     }
2127
2128     e->_type._sink = (dst == Block::sink_block);
2129 }
2130
2131 ParseFrame::Status
2132 Parser::frame_status(CodeRegion * cr, Address addr)
2133 {
2134     // XXX parsing frames may have been cleaned up, but status
2135     //     is always cached
2136     return _parse_data->frameStatus(cr,addr);
2137 }
2138
2139 void
2140 Parser::remove_func(Function *func)
2141 {
2142     if (sorted_funcs.end() != sorted_funcs.find(func)) {
2143         sorted_funcs.erase(func);
2144     }
2145     if (HINT == func->src()) {
2146         for (unsigned fidx=0; fidx < hint_funcs.size(); fidx++) {
2147             if (hint_funcs[fidx] == func) {
2148                 hint_funcs[fidx] = hint_funcs[hint_funcs.size()-1];
2149                 hint_funcs.pop_back();
2150                 break;
2151             }
2152         }
2153     }
2154     else {
2155         for (unsigned fidx=0; fidx < discover_funcs.size(); fidx++) {
2156             if (discover_funcs[fidx] == func) {
2157                 discover_funcs[fidx] = discover_funcs[discover_funcs.size()-1];
2158                 discover_funcs.pop_back();
2159                 break;
2160             }
2161         }
2162     }
2163
2164     _parse_data->remove_func(func);
2165 }
2166
2167 void
2168 Parser::remove_block(Dyninst::ParseAPI::Block *block)
2169 {
2170     boost::lock_guard<ParseData> g(*_parse_data, boost::adopt_lock);
2171     _parse_data->remove_block(block);
2172 }
2173
2174 void Parser::move_func(Function *func, Address new_entry, CodeRegion *new_reg)
2175 {
2176     region_data *reg_data = _parse_data->findRegion(func->region());
2177
2178     race_detector_fake_lock_acquire(race_detector_fake_lock(reg_data->funcsByAddr));
2179     {
2180       tbb::concurrent_hash_map<Address, Function*>::accessor a;
2181       if(reg_data->funcsByAddr.find(a, func->addr()))
2182         {
2183           reg_data->funcsByAddr.erase(a);
2184         }
2185       reg_data = _parse_data->findRegion(new_reg);
2186       reg_data->funcsByAddr.insert(a, make_pair(new_entry, func));
2187     }
2188     race_detector_fake_lock_release(race_detector_fake_lock(reg_data->funcsByAddr));
2189 }
2190
2191 void Parser::invalidateContainingFuncs(Function *owner, Block *b)
2192 {
2193     CodeRegion * cr;
2194     if(owner->region()->contains(b->start()))
2195         cr = owner->region();
2196     else
2197         cr = _parse_data->reglookup(owner->region(),b->start());
2198     region_data * rd = _parse_data->findRegion(cr);
2199
2200
2201     // Any functions holding b that have already been finalized
2202     // need to have their caches invalidated so that they will
2203     // find out that they have this new 'ret' block
2204     std::set<Function*> prev_owners;
2205     rd->findFuncs(b->start(),prev_owners);
2206     for(std::set<Function*>::iterator oit = prev_owners.begin();
2207         oit != prev_owners.end(); ++oit)
2208     {
2209         Function * po = *oit;
2210         po->_cache_valid = false;
2211         parsing_printf("[%s:%d] split of [%lx,%lx) invalidates cache of "
2212                                "func at %lx\n",
2213                        FILE__,__LINE__,b->start(),b->end(),po->addr());
2214     }
2215 }
2216
2217 /* Add ParseFrames waiting on func back to the work queue */
2218 void Parser::resumeFrames(Function * func, vector<ParseFrame *> & work)
2219 {
2220     // If we do not know the function's return status, don't put its waiters back on the worklist
2221     if (func->retstatus() == UNSET) {
2222         parsing_printf("[%s] %s return status unknown, cannot resume waiters\n",
2223                        __FILE__,
2224                        func->name().c_str());
2225         return;
2226     }
2227     boost::lock_guard<DelayedFrames> g(delayed_frames);
2228
2229     // When a function's return status is set, all waiting frames back into the worklist
2230     map<Function *, set<ParseFrame *> >::iterator iter = delayed_frames.frames.find(func);
2231     if (iter == delayed_frames.frames.end()) {
2232         // There were no frames waiting, ignore
2233         parsing_printf("[%s] %s return status %d, no waiters\n",
2234                        __FILE__,
2235                        func->name().c_str(),
2236                        func->retstatus());
2237         return;
2238     } else {
2239         parsing_printf("[%s] %s return status %d, undelaying waiting functions\n",
2240                        __FILE__,
2241                        func->name().c_str(),
2242                        func->retstatus());
2243         // Add each waiting frame back to the worklist
2244         set<ParseFrame *> vec = iter->second;
2245         for (set<ParseFrame *>::iterator fIter = vec.begin();
2246              fIter != vec.end();
2247              ++fIter) {
2248             work.push_back(*fIter);
2249         }
2250         // remove func from delayedFrames map
2251         delayed_frames.frames.erase(func);
2252     }
2253 }
2254
2255 bool Parser::getSyscallNumber(Function * /*func*/,
2256                               Block * block,
2257                               Address /*addr*/,
2258                               Architecture arch,
2259                               long int & val)
2260 {
2261     val = -1;
2262
2263     // In the common case, the setup of system call number
2264     // is a mov in the previous instruction. We don't currently look elsewhere.
2265     // In the future, could use slicing and symeval to find the value of
2266     // this register at the system call (as unstrip does).
2267     Block::Insns blockInsns;
2268     block->getInsns(blockInsns);
2269     auto prevPair = ++(blockInsns.rbegin());
2270     InstructionAPI::Instruction prevInsn = prevPair->second;
2271     if (prevInsn.getOperation().getID() != e_mov) {
2272         return false;
2273     }
2274
2275     MachRegister syscallNumberReg = MachRegister::getSyscallNumberReg(arch);
2276     if (syscallNumberReg == InvalidReg) {
2277         return false;
2278     }
2279     InstructionAPI::RegisterAST* regAST = new InstructionAPI::RegisterAST(syscallNumberReg);
2280     InstructionAPI::RegisterAST::Ptr regASTPtr = InstructionAPI::RegisterAST::Ptr(regAST);
2281
2282     std::vector<InstructionAPI::Operand> operands;
2283     prevInsn.getOperands(operands);
2284     for (unsigned i = 0; i < operands.size(); i++) {
2285         if (!operands[i].isWritten(regASTPtr)) {
2286             InstructionAPI::Expression::Ptr value = operands[i].getValue();
2287             InstructionAPI::Result res = value->eval();
2288             if (res.defined) {
2289                 val = res.convert<Offset>();
2290             }
2291         }
2292     }
2293
2294     return (val != -1);
2295 }
2296