Fixes for non-returning functions and tail calls:
[dyninst.git] / parseAPI / src / ParserDetails.C
1 /*
2  * See the dyninst/COPYRIGHT file for copyright information.
3  * 
4  * We provide the Paradyn Tools (below described as "Paradyn")
5  * on an AS IS basis, and do not warrant its validity or performance.
6  * We reserve the right to update, modify, or discontinue this
7  * software at any time.  We shall have no obligation to supply such
8  * updates or modifications or any other form of support to you.
9  * 
10  * By your use of Paradyn, you understand and agree that we (or any
11  * other person or entity with proprietary rights in Paradyn) are
12  * under no obligation to provide either maintenance services,
13  * update services, notices of latent defects, or correction of
14  * defects for Paradyn.
15  * 
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  * 
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  * 
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29  */
30 #include "CodeObject.h"
31 #include "CFG.h"
32 #include "ParseCallback.h"
33
34 #include "Parser.h"
35 #include "ParserDetails.h"
36 #include "ParseData.h"
37 #include "debug_parse.h"
38 #include "util.h"
39
40 using namespace std;
41 using namespace Dyninst;
42 using namespace Dyninst::ParseAPI;
43
44 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
45 typedef vector< edge_pair_t > Edges_t;
46
47 #define VERBOSE_EDGE_LOG
48
49 namespace {
50
51 inline void
52 #ifndef VERBOSE_EDGE_LOG                        
53 verbose_log(Address /* currAddr */, Edges_t::iterator & /* curEdge */)
54 {
55 #else
56 verbose_log(Address currAddr, Edges_t::iterator & curEdge)
57 {
58   using namespace Dyninst::ParseAPI;
59   
60     switch(curEdge->second)
61     {
62         case CALL:
63             parsing_printf("%s[%d]: adding call edge %lx->%lx\n",
64                            FILE__, __LINE__, currAddr, curEdge->first);
65             break;
66         case CALL_FT:
67             parsing_printf("%s[%d]: adding function fallthrough edge %lx->%lx\n",
68                            FILE__, __LINE__, currAddr, curEdge->first);
69             break;
70         case FALLTHROUGH:
71             parsing_printf("%s[%d]: adding fallthrough edge %lx->%lx\n",
72                            FILE__, __LINE__, currAddr, curEdge->first);
73             break;
74         case COND_TAKEN:
75             parsing_printf("%s[%d]: adding conditional taken edge %lx->%lx\n",
76                            FILE__, __LINE__, currAddr, curEdge->first);
77             break;
78         case COND_NOT_TAKEN:
79             parsing_printf("%s[%d]: adding conditional not taken edge %lx->%lx\n",
80                            FILE__, __LINE__, currAddr, curEdge->first);
81             break;
82         case INDIRECT:
83             parsing_printf("%s[%d]: adding indirect edge %lx->%lx\n",
84                            FILE__, __LINE__, currAddr, curEdge->first);
85             break;
86         case DIRECT:
87             parsing_printf("%s[%d]: adding direct edge %lx->%lx\n",
88                            FILE__, __LINE__, currAddr, curEdge->first);
89             break;
90         case CATCH:
91             parsing_printf("%s[%d]: adding catch block edge %lx->%lx\n",
92                            FILE__, __LINE__, currAddr, curEdge->first);
93             break;
94         default:
95             parsing_printf("%s[%d]: adding unknown edge type %d edge %lx->%lx\n",
96                            FILE__, __LINE__, curEdge->second, currAddr, curEdge->first);
97             break;
98     }
99 #endif // VERBOSE_EDGE_LOG
100 }
101 } // anonymous namespace
102
103 static void 
104 getBlockInsns(Block &blk, std::set<Address> &addrs)
105 {
106     unsigned bufSize = blk.size();
107     using namespace InstructionAPI;
108     const unsigned char* bufferBegin = (const unsigned char *)
109         (blk.obj()->cs()->getPtrToInstruction(blk.start()));
110     InstructionDecoder dec = InstructionDecoder
111         (bufferBegin, bufSize, blk.region()->getArch());
112     InstructionAdapter_t ah(dec, blk.start(), blk.obj(), blk.region(), blk.obj()->cs(), &blk);
113
114         for (; ah.getAddr() < blk.end(); ah.advance()) {
115         addrs.insert(ah.getAddr());
116     } 
117 }
118
119 /* called in defensive mode to create parseFrames at tampered addresses 
120    for functions that return TAMPER_ABS. */
121 ParseFrame * 
122 Parser::getTamperAbsFrame(Function *tamperFunc)
123 {
124     assert(TAMPER_ABS == tamperFunc->tampersStack());
125     Function * targFunc = NULL;
126
127     // get the binary's load address and subtract it
128
129     CodeSource *cs = tamperFunc->obj()->cs();
130     set<CodeRegion*> targRegs;
131
132     Address loadAddr = cs->loadAddress();
133     Address target = tamperFunc->_tamper_addr - loadAddr;
134     if (loadAddr < tamperFunc->_tamper_addr) {
135         cs->findRegions(target, targRegs);
136     }
137     if (targRegs.empty()) {
138         mal_printf("ERROR: could not create function at tamperAbs "
139                    "addr, which is in another object %lx\n",target);
140         return NULL;
141     }
142         
143     assert(1 == targRegs.size()); // we don't do analyze stack tampering on 
144                                   // platforms that use archive files
145     targFunc = _parse_data->get_func
146         (*(targRegs.begin()), 
147          target, 
148          tamperFunc->src());
149
150     if (!targFunc) {
151         targFunc = _parse_data->get_func(*(targRegs.begin()),target,RT);
152     }
153
154     if(!targFunc) {
155         mal_printf("ERROR: could not create function at tamperAbs addr %lx\n",
156                    tamperFunc->_tamper_addr);
157         return NULL;
158     }
159
160     ParseFrame * pf = NULL;
161     CodeRegion *reg = targFunc->region();
162
163     ParseFrame::Status exist = _parse_data->frameStatus(reg, target);
164     switch(exist) {
165     case ParseFrame::FRAME_ERROR:
166     case ParseFrame::PROGRESS:
167         fprintf(stderr,"ERROR: function frame at %lx in bad state, can't "
168                 "add edge; status=%d\n",target, exist);
169         return NULL;
170         break;
171     case ParseFrame::PARSED:
172         fprintf(stderr,"ERROR: function frame at %lx already parsed, can't "
173                 "add edge; status=%d\n",target, exist);
174         return NULL;
175         break;
176     case ParseFrame::BAD_LOOKUP:
177         // create new frame
178         pf = _parse_data->findFrame(reg, target);
179         assert( !pf );
180         pf = new ParseFrame(targFunc,_parse_data);
181         break;
182     case ParseFrame::UNPARSED:
183     case ParseFrame::CALL_BLOCKED:
184         pf = _parse_data->findFrame(reg, target);
185         if ( !pf ) {
186             fprintf(stderr,"ERROR: no function frame at %lx for frame "
187                     "that should exist, can't add edge; status=%d\n",
188                     target, exist);
189             return NULL;
190         }
191         break;
192     default:
193         assert(0);
194     }
195         
196     // make a temp edge
197     Function::const_blocklist ret_blks = tamperFunc->returnBlocks();
198     for (auto bit = ret_blks.begin(); 
199          bit != ret_blks.end(); 
200          ++bit)
201     {
202         Edge *edge = link_tempsink(*bit, CALL);
203
204         // create new bundle since we're not adding CALL,CALL_FT edge pairs
205         /* FIXME flag for Drew or Kevin:
206              this code (see original in comment below) does not add
207             the new work element to the worklist; I preserved this
208             behavior while changing code to fit the new, 
209             leak-free idiom, but I don't know whether this is intended
210             or a bug. You might want to wrap the call in a frame.pushWork.
211          */
212         (void)pf->mkWork(NULL,edge,target,true,true);
213         /*
214         ParseWorkBundle *bundle = new ParseWorkBundle();
215         pf->work_bundles.push_back(bundle);
216         bundle->add(
217             new ParseWorkElem(
218                 bundle,
219                 edge,
220                 target,
221                 true,
222                 true)
223           );
224         */
225     }
226
227     return pf;
228 }
229
230 // Param pf tampers with its stack by a relative or absolute amount. 
231 // In the first case, adjust CALL_FT target edge if there is one
232 // In the second case, add a new ParseFrame to the worklist or 
233 // trigger parsing in the target object. 
234 void
235 Parser::tamper_post_processing(vector<ParseFrame *> & work, ParseFrame *pf)
236 {
237     // tampers with stack by relative amount: 
238     // adjust CALL_FT target edge if there is one
239     for (unsigned widx = 0; 
240          pf->func->tampersStack() == TAMPER_REL && 
241          widx < work.size(); 
242          widx++) 
243     {
244         if (work[widx]->status() == ParseFrame::CALL_BLOCKED &&
245             pf->func == work[widx]->call_target) 
246         {
247             for (unsigned bidx=0 ; 
248                  bidx < work[widx]->work_bundles.size(); 
249                  bidx++) 
250             {
251                 const vector<ParseWorkElem*> &elems = 
252                     work[widx]->work_bundles[bidx]->elems();
253                 bool rightBundle = false;
254                 ParseWorkElem * ftEdge = NULL;
255                 for (unsigned eix=0; eix < elems.size(); eix++)
256                 {
257                     if (NULL == elems[eix]->edge()) 
258                     {
259                         continue;
260                     }
261                     if (elems[eix]->edge()->type() == CALL &&
262                         elems[eix]->target()==pf->func->addr())
263                     {
264                         rightBundle = true;
265                     }
266                     else if (elems[eix]->edge()->type() == CALL_FT)
267                     {
268                         ftEdge = elems[eix];
269                     }
270                 }
271                 if (rightBundle && ftEdge) 
272                 {
273                     ftEdge->setTarget(ftEdge->target() + 
274                                       pf->func->_tamper_addr);
275                 }
276             }
277         }
278     }
279     // create frame for TAMPER_ABS target in this object or parse
280     // in target object 
281     if (pf->func->tampersStack() == TAMPER_ABS) 
282     {
283         Address objLoad = 0;
284         CodeObject *targObj = NULL;
285         if (_pcb.absAddr(pf->func->_tamper_addr, 
286                          objLoad, 
287                          targObj)) 
288         {
289             if (targObj == &_obj) { // target is in this object, add frame
290                 ParseFrame * tf = getTamperAbsFrame(pf->func);
291                 if (tf && ! _parse_data->findFrame(tf->func->region(),
292                                                    tf->func->addr()) ) 
293                 {
294                     init_frame(*tf);
295                     frames.push_back(tf);
296                     _parse_data->record_frame(tf);
297                     _pcb.updateCodeBytes(pf->func->_tamper_addr - objLoad);
298                 }
299                 if (tf) {
300                     mal_printf("adding TAMPER_ABS target %lx frame\n", 
301                                pf->func->_tamper_addr);
302                     work.push_back(tf);
303                 }
304             }
305             else { // target is in another object, parse there
306                 mal_printf("adding TAMPER_ABS target %lx "
307                            "in separate object at %lx\n", 
308                            pf->func->_tamper_addr, objLoad);
309                 _obj.parse(pf->func->_tamper_addr - objLoad, true);
310             }
311         }
312         else {
313             mal_printf("discarding invalid TAMPER_ABS target %lx\n", 
314                        pf->func->_tamper_addr);
315         }
316     }
317 }
318
319
320 /*
321  * Extra handling for bad jump instructions
322  */
323 inline
324 void Parser::ProcessUnresBranchEdge(
325     ParseFrame & frame,
326     Block * cur,
327     InstructionAdapter_t & ah,
328     Address target)
329 {
330     ParseCallback::interproc_details det;
331     det.ibuf = (unsigned char*)
332        frame.func->isrc()->getPtrToInstruction(ah.getAddr());
333     det.isize = ah.getSize();
334     if (((Address)-1) == target) {
335         det.data.unres.target = 0;
336     } else {
337         det.data.unres.target = target;
338     }
339     det.type = ParseCallback::interproc_details::unresolved;
340     det.data.unres.target = target;
341
342     bool valid; Address addr;
343     boost::tie(valid, addr) = ah.getCFT();
344     if (!valid) {
345         det.data.unres.dynamic = true;
346         det.data.unres.absolute_address = true;
347     } else {
348         det.data.unres.dynamic = false;
349         det.data.unres.absolute_address = false;
350     }
351     _pcb.interproc_cf(frame.func,cur,ah.getAddr(),&det);
352 }
353
354 /*
355  * Extra handling of return instructions
356  */
357 inline
358 void Parser::ProcessReturnInsn(
359     ParseFrame & frame,
360     Block * cur,
361     InstructionAdapter_t & ah)
362 {
363     // returns always target the sink block
364     link(cur,_sink,RET,true);
365
366     ParseCallback::interproc_details det;
367     det.ibuf = (unsigned char*)
368         frame.func->isrc()->getPtrToInstruction(ah.getAddr());
369     det.isize = ah.getSize();
370     det.type = ParseCallback::interproc_details::ret;
371
372     _pcb.interproc_cf(frame.func, cur, ah.getAddr(),&det);
373 }
374
375
376 /*
377  * Extra handling for literal call instructions
378  * as well as other instructions treated as calls
379  */
380 inline
381 void Parser::ProcessCallInsn(
382     ParseFrame & frame,
383     Block * cur,
384     InstructionAdapter_t & ah,
385     bool isDynamic,
386     bool isAbsolute,
387     bool isResolved,
388     Address target)
389 {
390     ParseCallback::interproc_details det;
391     det.ibuf = (unsigned char*)
392        frame.func->isrc()->getPtrToInstruction(ah.getAddr());
393     det.isize = ah.getSize();
394     
395     if(ah.isCall()) {
396         det.data.call.absolute_address = isAbsolute;
397         det.data.call.dynamic_call = isDynamic;
398         det.data.call.target = target;
399         if (likely(isResolved))
400             det.type = ParseCallback::interproc_details::call; 
401         else
402             det.type = ParseCallback::interproc_details::unresolved; 
403     }
404     else
405         det.type = ParseCallback::interproc_details::branch_interproc;
406     
407     _pcb.interproc_cf(frame.func,cur,ah.getAddr(),&det);
408 }
409
410 void Parser::ProcessCFInsn(
411     ParseFrame & frame,
412     Block * cur,
413     InstructionAdapter_t & ah)
414 {
415     FuncReturnStatus insn_ret;
416     Edges_t edges_out;
417     ParseWorkBundle * bundle = NULL;
418
419
420     // terminate the block at this address
421     end_block(cur,ah);
422     
423     // Instruction adapter provides edge estimates from an instruction
424     parsing_printf("Getting edges\n");
425     ah.getNewEdges(edges_out, frame.func, cur, frame.num_insns, &plt_entries, frame.knownTargets); 
426     parsing_printf("Returned %d edges\n", edges_out.size());
427     if (unlikely(_obj.defensiveMode() && !ah.isCall() && edges_out.size())) {
428         // only parse branch edges that align with existing blocks
429         bool hasUnalignedEdge = false;
430         set<CodeRegion*> tregs;
431         set<Block*> tblocks;
432         set<Address> insns_cur;
433         for(Edges_t::iterator curEdge = edges_out.begin();
434             !hasUnalignedEdge && curEdge != edges_out.end(); 
435             ++curEdge)
436         {
437             if (cur->end() <= curEdge->first) {
438               parsing_printf("%s[%d] skipping edge\n", FILE__, __LINE__);
439                 continue;
440             }
441             _obj.cs()->findRegions(curEdge->first,tregs);
442             if (!tregs.empty()) {
443                 _parse_data->findBlocks(*tregs.begin(),curEdge->first, tblocks);
444                 for (set<Block*>::iterator bit= tblocks.begin(); 
445                      !hasUnalignedEdge && bit != tblocks.end(); 
446                      bit++) 
447                 {
448                     if ((*bit)->end() <= cur->start() ||
449                         (*bit) == cur) 
450                     {
451                       parsing_printf("%s[%d] skipping edge\n", FILE__, __LINE__);
452                         continue;
453                     }
454                     set<Address> insns_tblk;
455                     getBlockInsns(**bit,insns_tblk);
456                     if (insns_cur.empty()) {
457                         getBlockInsns(*cur,insns_cur);
458                     }
459                     if ((*bit)->start() < cur->start()) {
460                         if (insns_tblk.end() == insns_tblk.find(cur->start())) {
461                             hasUnalignedEdge = true;
462                         } 
463                     } else if (insns_cur.end() == insns_cur.find((*bit)->start())) {
464                         hasUnalignedEdge = true;
465                     }
466                     if (hasUnalignedEdge) {
467                         mal_printf("Found unaligned blocks [%lx %lx) [%lx %lx), adding abruptEnd "
468                                    "point and killing out edges\n", cur->start(), cur->end(), 
469                                    (*bit)->start(), (*bit)->end());
470                     }
471                 }
472             }
473         }
474         if (true == hasUnalignedEdge) {
475           parsing_printf("Has unaligned edge, clearing and treating as abrupt end\n");
476             edges_out.clear();
477             ParseCallback::default_details det(
478                 (unsigned char*) cur->region()->getPtrToInstruction(cur->lastInsnAddr()),
479                 cur->end() - cur->lastInsnAddr(),
480                 true);
481             _pcb.abruptEnd_cf(cur->lastInsnAddr(),cur,&det);
482         }
483     }
484
485     insn_ret = ah.getReturnStatus(frame.func,frame.num_insns); 
486
487     // Update function return status if possible
488     if(unlikely(insn_ret != UNSET && frame.func->_rs < RETURN))
489         frame.func->set_retstatus(insn_ret);
490
491     // Return instructions need extra processing
492     if(insn_ret == RETURN)
493        ProcessReturnInsn(frame,cur,ah);
494
495     bool dynamic_call = ah.isDynamicCall();
496     bool absolute_call = ah.isAbsoluteCall();
497     // unresolved is true for indirect calls, unresolved indirect branches, 
498     // and later on is set set to true for transfers to bad addresses
499     bool has_unres = ah.hasUnresolvedControlFlow(frame.func,frame.num_insns);
500
501     parsing_printf("\t\t%d edges:\n",edges_out.size());
502     for(Edges_t::iterator curEdge = edges_out.begin();
503         curEdge != edges_out.end(); ++curEdge)
504     {
505         Edge * newedge = NULL;
506         bool resolvable_edge = true;
507         bool tailcall = false;
508
509         if(!is_code(frame.func,curEdge->first) &&
510            !HASHDEF(plt_entries,curEdge->first))
511         {
512             if(curEdge->second != CALL || !dynamic_call) {
513                 has_unres = true;
514                 resolvable_edge = false;
515                 if ((int)curEdge->second != -1 && _obj.defensiveMode()) 
516                     mal_printf("bad edge target at %lx type %d\n",
517                                curEdge->first, curEdge->second);
518             }
519         }
520
521         /*
522          * Call case 
523          */ 
524         if(curEdge->second == CALL)
525         {
526             // call callback
527             resolvable_edge = resolvable_edge && !dynamic_call;
528             ProcessCallInsn(frame,cur,ah,dynamic_call,
529                 absolute_call,resolvable_edge,curEdge->first);
530
531             if(resolvable_edge) {
532                  newedge = link_tempsink(cur,CALL);
533             }
534             else { 
535                 newedge = link(cur,_sink,CALL,true);
536             }
537             if(!ah.isCall()) {
538                parsing_printf("Setting edge 0x%lx (0x%lx/0x%lx) to interproc\n",
539                               newedge,
540                               newedge->src()->start(), 
541                               newedge->trg()->start());
542                 newedge->_type._interproc = true;
543             }
544         }
545         /*
546          * All other edge types are handled identically
547          */
548         else
549         {
550             if(resolvable_edge) {
551                 newedge = link_tempsink(cur,curEdge->second);
552             }
553             else
554                 newedge = link(cur,_sink,curEdge->second,true);
555         }
556
557         if (ah.isTailCall(frame.func, curEdge->second, frame.num_insns, frame.knownTargets)) {
558             tailcall = true; 
559             parsing_printf("Setting edge 0x%lx (0x%lx/0x%lx) to interproc (tail call)\n",
560                           newedge,
561                           newedge->src()->start(),
562                           newedge->trg()->start());
563             newedge->_type._interproc = true;
564         }
565
566         if(!bundle) {
567             bundle = new ParseWorkBundle();
568             frame.work_bundles.push_back(bundle);
569         }
570
571         verbose_log(ah.getAddr(),curEdge);
572         parsing_printf("resolveable_edge: %d, tailcall: %d, target: %lx\n", resolvable_edge, tailcall, curEdge->first);
573         ParseWorkElem * we = 
574           bundle->add(
575             new ParseWorkElem(
576                 bundle,
577                 newedge,
578                 curEdge->first,
579                 resolvable_edge,
580                 tailcall)
581           );
582         frame.knownTargets.insert(curEdge->first);
583
584         // We will not attempt to further process
585         // unresolvable edges; they stay sinks
586         if(resolvable_edge) {
587             parsing_printf("[%s:%d] pushing %lx onto worklist\n",
588                 FILE__,__LINE__,we->target());
589             frame.pushWork(we);
590
591             if (unlikely(_obj.defensiveMode())) {
592                 // update the underlying code bytes for CF targets
593                 if (  CALL == curEdge->second
594                       || DIRECT == curEdge->second
595                       || COND_TAKEN == curEdge->second)
596                 {
597
598                     _pcb.updateCodeBytes(curEdge->first);
599                 }
600             }
601         } 
602         else if( unlikely(_obj.defensiveMode()) )
603         {   
604             ProcessUnresBranchEdge(frame, cur, ah, curEdge->first);
605         }
606     }
607
608     if (unlikely(has_unres && edges_out.empty())) {
609         link(cur, _sink, INDIRECT, true);
610         ProcessUnresBranchEdge(frame, cur, ah, -1);
611          }
612
613     if(ah.isDelaySlot())
614         ah.advance();
615
616         if(!frame.func->_cleans_stack && ah.cleansStack()) {
617         frame.func->_cleans_stack = true;
618         }
619 }