Merge branch 'dyninst_proccontrol_bw'
[dyninst.git] / parseAPI / src / ParserDetails.C
1 /*
2  * Copyright (c) 1996-2011 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as "Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * By your use of Paradyn, you understand and agree that we (or any
12  * other person or entity with proprietary rights in Paradyn) are
13  * under no obligation to provide either maintenance services,
14  * update services, notices of latent defects, or correction of
15  * defects for Paradyn.
16  * 
17  * This library is free software; you can redistribute it and/or
18  * modify it under the terms of the GNU Lesser General Public
19  * License as published by the Free Software Foundation; either
20  * version 2.1 of the License, or (at your option) any later version.
21  * 
22  * This library is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * Lesser General Public License for more details.
26  * 
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30  */
31 #include "CodeObject.h"
32 #include "CFG.h"
33 #include "ParseCallback.h"
34
35 #include "Parser.h"
36 #include "ParserDetails.h"
37 #include "ParseData.h"
38 #include "debug_parse.h"
39 #include "util.h"
40
41 using namespace std;
42 using namespace Dyninst;
43 using namespace Dyninst::ParseAPI;
44
45 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
46 typedef vector< edge_pair_t > Edges_t;
47
48 #define VERBOSE_EDGE_LOG
49
50 namespace {
51
52 inline void
53 #ifndef VERBOSE_EDGE_LOG                        
54 verbose_log(Address /* currAddr */, Edges_t::iterator & /* curEdge */)
55 {
56 #else
57 verbose_log(Address currAddr, Edges_t::iterator & curEdge)
58 {
59   using namespace Dyninst::ParseAPI;
60   
61     switch(curEdge->second)
62     {
63         case CALL:
64             parsing_printf("%s[%d]: adding call edge %x->%x\n",
65                            FILE__, __LINE__, currAddr, curEdge->first);
66             break;
67         case CALL_FT:
68             parsing_printf("%s[%d]: adding function fallthrough edge %x->%x\n",
69                            FILE__, __LINE__, currAddr, curEdge->first);
70             break;
71         case FALLTHROUGH:
72             parsing_printf("%s[%d]: adding fallthrough edge %x->%x\n",
73                            FILE__, __LINE__, currAddr, curEdge->first);
74             break;
75         case COND_TAKEN:
76             parsing_printf("%s[%d]: adding conditional taken edge %x->%x\n",
77                            FILE__, __LINE__, currAddr, curEdge->first);
78             break;
79         case COND_NOT_TAKEN:
80             parsing_printf("%s[%d]: adding conditional not taken edge %x->%x\n",
81                            FILE__, __LINE__, currAddr, curEdge->first);
82             break;
83         case INDIRECT:
84             parsing_printf("%s[%d]: adding indirect edge %x->%x\n",
85                            FILE__, __LINE__, currAddr, curEdge->first);
86             break;
87         case DIRECT:
88             parsing_printf("%s[%d]: adding direct edge %x->%x\n",
89                            FILE__, __LINE__, currAddr, curEdge->first);
90             break;
91         case CATCH:
92             parsing_printf("%s[%d]: adding catch block edge %x->%x\n",
93                            FILE__, __LINE__, currAddr, curEdge->first);
94             break;
95         default:
96             parsing_printf("%s[%d]: adding unknown edge type %d edge %x->%x\n",
97                            FILE__, __LINE__, curEdge->second, currAddr, curEdge->first);
98             break;
99     }
100 #endif // VERBOSE_EDGE_LOG
101 }
102 } // anonymous namespace
103
104
105 static void 
106 getBlockInsns(Block &blk, std::set<Address> &addrs)
107 {
108     unsigned bufSize = blk.size();
109 #if defined(cap_instruction_api)
110     using namespace InstructionAPI;
111     const unsigned char* bufferBegin = (const unsigned char *)
112         (blk.obj()->cs()->getPtrToInstruction(blk.start()));
113     InstructionDecoder dec = InstructionDecoder
114         (bufferBegin, bufSize, blk.region()->getArch());
115     InstructionAdapter_t ah(dec, blk.start(), blk.obj(), blk.region(), blk.obj()->cs(), &blk);
116 #else        
117     InstrucIter iter(blk.start(), bufSize, blk.obj()->cs());
118     InstructionAdapter_t ah(iter, 
119         blk.obj(), blk.region(), obj()->cs());
120 #endif
121
122         for (; ah.getAddr() < blk.end(); ah.advance()) {
123         addrs.insert(ah.getAddr());
124     } 
125 }
126
127 /* called in defensive mode to create parseFrames at tampered addresses 
128    for functions that return TAMPER_ABS. */
129 ParseFrame * 
130 Parser::getTamperAbsFrame(Function *tamperFunc)
131 {
132     assert(TAMPER_ABS == tamperFunc->tampersStack());
133     Function * targFunc = NULL;
134
135     // get the binary's load address and subtract it
136
137     CodeSource *cs = tamperFunc->obj()->cs();
138     set<CodeRegion*> targRegs;
139
140     Address loadAddr = cs->loadAddress();
141     Address target = tamperFunc->_tamper_addr - loadAddr;
142     if (loadAddr < tamperFunc->_tamper_addr) {
143         cs->findRegions(target, targRegs);
144     }
145     if (targRegs.empty()) {
146         mal_printf("ERROR: could not create function at tamperAbs "
147                    "addr, which is in another object %lx\n",target);
148         return NULL;
149     }
150         
151     assert(1 == targRegs.size()); // we don't do analyze stack tampering on 
152                                   // platforms that use archive files
153     targFunc = _parse_data->get_func
154         (*(targRegs.begin()), 
155          target, 
156          tamperFunc->src());
157
158     if (!targFunc) {
159         targFunc = _parse_data->get_func(*(targRegs.begin()),target,RT);
160     }
161
162     if(!targFunc) {
163         mal_printf("ERROR: could not create function at tamperAbs addr %lx\n",
164                    tamperFunc->_tamper_addr);
165         return NULL;
166     }
167
168     ParseFrame * pf = NULL;
169     CodeRegion *reg = targFunc->region();
170
171     ParseFrame::Status exist = _parse_data->frameStatus(reg, target);
172     switch(exist) {
173     case ParseFrame::FRAME_ERROR:
174     case ParseFrame::PROGRESS:
175         fprintf(stderr,"ERROR: function frame at %lx in bad state, can't "
176                 "add edge; status=%d\n",target, exist);
177         return NULL;
178         break;
179     case ParseFrame::PARSED:
180         fprintf(stderr,"ERROR: function frame at %lx already parsed, can't "
181                 "add edge; status=%d\n",target, exist);
182         return NULL;
183         break;
184     case ParseFrame::BAD_LOOKUP:
185         // create new frame
186         pf = _parse_data->findFrame(reg, target);
187         assert( !pf );
188         pf = new ParseFrame(targFunc,_parse_data);
189         break;
190     case ParseFrame::UNPARSED:
191     case ParseFrame::CALL_BLOCKED:
192         pf = _parse_data->findFrame(reg, target);
193         if ( !pf ) {
194             fprintf(stderr,"ERROR: no function frame at %lx for frame "
195                     "that should exist, can't add edge; status=%d\n",
196                     target, exist);
197             return NULL;
198         }
199         break;
200     default:
201         assert(0);
202     }
203         
204     // make a temp edge
205     Function::blocklist & ret_blks = tamperFunc->returnBlocks();
206     for (Function::blocklist::iterator bit = ret_blks.begin(); 
207          bit != ret_blks.end(); 
208          bit++)
209     {
210         Edge *edge = link_tempsink(*bit, CALL);
211
212         // create new bundle since we're not adding CALL,CALL_FT edge pairs
213         /* FIXME flag for Drew or Kevin:
214              this code (see original in comment below) does not add
215             the new work element to the worklist; I preserved this
216             behavior while changing code to fit the new, 
217             leak-free idiom, but I don't know whether this is intended
218             or a bug. You might want to wrap the call in a frame.pushWork.
219          */
220         (void)pf->mkWork(NULL,edge,target,true,true);
221         /*
222         ParseWorkBundle *bundle = new ParseWorkBundle();
223         pf->work_bundles.push_back(bundle);
224         bundle->add(
225             new ParseWorkElem(
226                 bundle,
227                 edge,
228                 target,
229                 true,
230                 true)
231           );
232         */
233     }
234
235     return pf;
236 }
237
238 // Param pf tampers with its stack by a relative or absolute amount. 
239 // In the first case, adjust CALL_FT target edge if there is one
240 // In the second case, add a new ParseFrame to the worklist or 
241 // trigger parsing in the target object. 
242 void
243 Parser::tamper_post_processing(vector<ParseFrame *> & work, ParseFrame *pf)
244 {
245     // tampers with stack by relative amount: 
246     // adjust CALL_FT target edge if there is one
247     for (unsigned widx = 0; 
248          pf->func->tampersStack() == TAMPER_REL && 
249          widx < work.size(); 
250          widx++) 
251     {
252         if (work[widx]->status() == ParseFrame::CALL_BLOCKED &&
253             pf->func == work[widx]->call_target) 
254         {
255             for (unsigned bidx=0 ; 
256                  bidx < work[widx]->work_bundles.size(); 
257                  bidx++) 
258             {
259                 const vector<ParseWorkElem*> &elems = 
260                     work[widx]->work_bundles[bidx]->elems();
261                 bool rightBundle = false;
262                 ParseWorkElem * ftEdge = NULL;
263                 for (unsigned eix=0; eix < elems.size(); eix++)
264                 {
265                     if (NULL == elems[eix]->edge()) 
266                     {
267                         continue;
268                     }
269                     if (elems[eix]->edge()->type() == CALL &&
270                         elems[eix]->target()==pf->func->addr())
271                     {
272                         rightBundle = true;
273                     }
274                     else if (elems[eix]->edge()->type() == CALL_FT)
275                     {
276                         ftEdge = elems[eix];
277                     }
278                 }
279                 if (rightBundle && ftEdge) 
280                 {
281                     ftEdge->setTarget(ftEdge->target() + 
282                                       pf->func->_tamper_addr);
283                 }
284             }
285         }
286     }
287     // create frame for TAMPER_ABS target in this object or parse
288     // in target object 
289     if (pf->func->tampersStack() == TAMPER_ABS) 
290     {
291         Address objLoad = 0;
292         CodeObject *targObj = NULL;
293         if (_pcb.absAddr(pf->func->_tamper_addr, 
294                          objLoad, 
295                          targObj)) 
296         {
297             if (targObj == &_obj) { // target is in this object, add frame
298                 ParseFrame * tf = getTamperAbsFrame(pf->func);
299                 if (tf && ! _parse_data->findFrame(tf->func->region(),
300                                                    tf->func->addr()) ) 
301                 {
302                     init_frame(*tf);
303                     frames.push_back(tf);
304                     _parse_data->record_frame(tf);
305                     _pcb.updateCodeBytes(pf->func->_tamper_addr - objLoad);
306                 }
307                 if (tf) {
308                     mal_printf("adding TAMPER_ABS target %lx frame\n", 
309                                pf->func->_tamper_addr);
310                     work.push_back(tf);
311                 }
312             }
313             else { // target is in another object, parse there
314                 mal_printf("adding TAMPER_ABS target %lx "
315                            "in separate object at %lx\n", 
316                            pf->func->_tamper_addr, objLoad);
317                 _obj.parse(pf->func->_tamper_addr - objLoad, true);
318             }
319         }
320         else {
321             mal_printf("discarding invalid TAMPER_ABS target %lx\n", 
322                        pf->func->_tamper_addr);
323         }
324     }
325 }
326
327
328 /*
329  * Extra handling for bad jump instructions
330  */
331 inline
332 void Parser::ProcessUnresBranchEdge(
333     ParseFrame & frame,
334     Block * cur,
335     InstructionAdapter_t & ah,
336     Address target)
337 {
338     ParseCallback::interproc_details det;
339     det.ibuf = (unsigned char*)
340        frame.func->isrc()->getPtrToInstruction(ah.getAddr());
341     det.isize = ah.getSize();
342     if (((Address)-1) == target) {
343         det.data.unres.target = 0;
344     } else {
345         det.data.unres.target = target;
346     }
347     det.type = ParseCallback::interproc_details::unresolved;
348     det.data.unres.target = target;
349
350     bool valid; Address addr;
351     boost::tie(valid, addr) = ah.getCFT();
352     if (!valid) {
353         det.data.unres.dynamic = true;
354         det.data.unres.absolute_address = true;
355     } else {
356         det.data.unres.dynamic = false;
357         det.data.unres.absolute_address = false;
358     }
359     _pcb.interproc_cf(frame.func,cur,ah.getAddr(),&det);
360 }
361
362 /*
363  * Extra handling of return instructions
364  */
365 inline
366 void Parser::ProcessReturnInsn(
367     ParseFrame & frame,
368     Block * cur,
369     InstructionAdapter_t & ah)
370 {
371     // returns always target the sink block
372     link(cur,_sink,RET,true);
373
374     ParseCallback::interproc_details det;
375     det.ibuf = (unsigned char*)
376         frame.func->isrc()->getPtrToInstruction(ah.getAddr());
377     det.isize = ah.getSize();
378     det.type = ParseCallback::interproc_details::ret;
379
380     _pcb.interproc_cf(frame.func, cur, ah.getAddr(),&det);
381 }
382
383
384 /*
385  * Extra handling for literal call instructions
386  * as well as other instructions treated as calls
387  */
388 inline
389 void Parser::ProcessCallInsn(
390     ParseFrame & frame,
391     Block * cur,
392     InstructionAdapter_t & ah,
393     bool isDynamic,
394     bool isAbsolute,
395     bool isResolved,
396     Address target)
397 {
398     ParseCallback::interproc_details det;
399     det.ibuf = (unsigned char*)
400        frame.func->isrc()->getPtrToInstruction(ah.getAddr());
401     det.isize = ah.getSize();
402     
403     if(ah.isCall()) {
404         det.data.call.absolute_address = isAbsolute;
405         det.data.call.dynamic_call = isDynamic;
406         det.data.call.target = target;
407         if (likely(isResolved))
408             det.type = ParseCallback::interproc_details::call; 
409         else
410             det.type = ParseCallback::interproc_details::unresolved; 
411     }
412     else
413         det.type = ParseCallback::interproc_details::branch_interproc;
414     
415     _pcb.interproc_cf(frame.func,cur,ah.getAddr(),&det);
416 }
417
418 void Parser::ProcessCFInsn(
419     ParseFrame & frame,
420     Block * cur,
421     InstructionAdapter_t & ah)
422 {
423     FuncReturnStatus insn_ret;
424     Edges_t edges_out;
425     ParseWorkBundle * bundle = NULL;
426
427
428     // terminate the block at this address
429     end_block(cur,ah);
430     
431     // Instruction adapter provides edge estimates from an instruction
432     ah.getNewEdges(edges_out, frame.func, cur, frame.num_insns, &plt_entries); 
433
434     if (unlikely(_obj.defensiveMode() && !ah.isCall() && edges_out.size())) {
435         // only parse branch edges that align with existing blocks
436         bool hasUnalignedEdge = false;
437         set<CodeRegion*> tregs;
438         set<Block*> tblocks;
439         set<Address> insns_cur;
440         for(Edges_t::iterator curEdge = edges_out.begin();
441             !hasUnalignedEdge && curEdge != edges_out.end(); 
442             ++curEdge)
443         {
444             if (cur->end() <= curEdge->first) {
445                 continue;
446             }
447             _obj.cs()->findRegions(curEdge->first,tregs);
448             if (!tregs.empty()) {
449                 _parse_data->findBlocks(*tregs.begin(),curEdge->first, tblocks);
450                 for (set<Block*>::iterator bit= tblocks.begin(); 
451                      !hasUnalignedEdge && bit != tblocks.end(); 
452                      bit++) 
453                 {
454                     if ((*bit)->end() <= cur->start() ||
455                         (*bit) == cur) 
456                     {
457                         continue;
458                     }
459                     set<Address> insns_tblk;
460                     getBlockInsns(**bit,insns_tblk);
461                     if (insns_cur.empty()) {
462                         getBlockInsns(*cur,insns_cur);
463                     }
464                     if ((*bit)->start() < cur->start()) {
465                         if (insns_tblk.end() == insns_tblk.find(cur->start())) {
466                             hasUnalignedEdge = true;
467                         } 
468                     } else if (insns_cur.end() == insns_cur.find((*bit)->start())) {
469                         hasUnalignedEdge = true;
470                     }
471                     if (hasUnalignedEdge) {
472                         mal_printf("Found unaligned blocks [%lx %lx) [%lx %lx), adding abruptEnd "
473                                    "point and killing out edges\n", cur->start(), cur->end(), 
474                                    (*bit)->start(), (*bit)->end());
475                     }
476                 }
477             }
478         }
479         if (true == hasUnalignedEdge) {
480             edges_out.clear();
481             ParseCallback::default_details det(
482                 (unsigned char*) cur->region()->getPtrToInstruction(cur->lastInsnAddr()),
483                 cur->end() - cur->lastInsnAddr(),
484                 true);
485             _pcb.abruptEnd_cf(cur->lastInsnAddr(),cur,&det);
486         }
487     }
488
489     insn_ret = ah.getReturnStatus(frame.func,frame.num_insns); 
490
491     // Update function return status if possible
492     if(unlikely(insn_ret != UNSET && frame.func->_rs < RETURN))
493         frame.func->set_retstatus(insn_ret);
494
495     // Return instructions need extra processing
496     if(insn_ret == RETURN)
497        ProcessReturnInsn(frame,cur,ah);
498
499     bool dynamic_call = ah.isDynamicCall();
500     bool absolute_call = ah.isAbsoluteCall();
501     // unresolved is true for indirect calls, unresolved indirect branches, 
502     // and later on is set set to true for transfers to bad addresses
503     bool has_unres = ah.hasUnresolvedControlFlow(frame.func,frame.num_insns);
504
505     parsing_printf("\t\t%d edges:\n",edges_out.size());
506     for(Edges_t::iterator curEdge = edges_out.begin();
507         curEdge != edges_out.end(); ++curEdge)
508     {
509         Edge * newedge = NULL;
510         bool resolvable_edge = true;
511         bool tailcall = false;
512
513         if(!is_code(frame.func,curEdge->first) &&
514            !HASHDEF(plt_entries,curEdge->first))
515         {
516             if(curEdge->second != NOEDGE || !dynamic_call) {
517                 has_unres = true;
518                 resolvable_edge = false;
519                 if (curEdge->second != -1 && _obj.defensiveMode()) 
520                     mal_printf("bad edge target at %lx type %d\n",
521                                curEdge->first, curEdge->second);
522             }
523         }
524
525         /*
526          * Call case 
527          */ 
528         if(curEdge->second == NOEDGE)
529         {
530             // call callback
531             resolvable_edge = resolvable_edge && !dynamic_call;
532             ProcessCallInsn(frame,cur,ah,dynamic_call,
533                 absolute_call,resolvable_edge,curEdge->first);
534
535             tailcall = !dynamic_call && 
536                 ah.isTailCall(frame.func,frame.num_insns);
537             if(resolvable_edge)
538                 newedge = link_tempsink(cur,CALL);
539             else {
540                 newedge = link(cur,_sink,CALL,true);
541             }
542             if(!ah.isCall())
543                 newedge->_type._interproc = true;
544         }
545         /*
546          * All other edge types are handled identically
547          */
548         else
549         {
550             if(resolvable_edge) {
551                 newedge = link_tempsink(cur,curEdge->second);
552             }
553             else
554                 newedge = link(cur,_sink,curEdge->second,true);
555         }
556
557         if(!bundle) {
558             bundle = new ParseWorkBundle();
559             frame.work_bundles.push_back(bundle);
560         }
561
562         verbose_log(ah.getAddr(),curEdge);
563
564         ParseWorkElem * we = 
565           bundle->add(
566             new ParseWorkElem(
567                 bundle,
568                 newedge,
569                 curEdge->first,
570                 resolvable_edge,
571                 tailcall)
572           );
573
574         // We will not attempt to further process
575         // unresolvable edges; they stay sinks
576         if(resolvable_edge) {
577             parsing_printf("[%s:%d] pushing %lx onto worklist\n",
578                 FILE__,__LINE__,we->target());
579             frame.pushWork(we);
580
581             if (unlikely(_obj.defensiveMode())) {
582                 // update the underlying code bytes for CF targets
583                 if (  CALL == curEdge->second
584                       || DIRECT == curEdge->second
585                       || COND_TAKEN == curEdge->second
586                                           || NOEDGE == curEdge->second)
587                 {
588
589                     _pcb.updateCodeBytes(curEdge->first);
590                 }
591             }
592         } 
593         else if( unlikely(_obj.defensiveMode() && NOEDGE != curEdge->second) )
594         {   
595             ProcessUnresBranchEdge(frame, cur, ah, curEdge->first);
596         }
597     }
598
599     if (unlikely(has_unres && edges_out.empty())) {
600         link(cur, _sink, INDIRECT, true);
601         ProcessUnresBranchEdge(frame, cur, ah, -1);
602          }
603
604     if(ah.isDelaySlot())
605         ah.advance();
606
607         if(!frame.func->_cleans_stack && ah.cleansStack()) {
608         frame.func->_cleans_stack = true;
609         }
610 }