Fix our tailcall parsing; we've observed conditional tailcalls in the wild and weren...
[dyninst.git] / parseAPI / src / ParserDetails.C
1 /*
2  * See the dyninst/COPYRIGHT file for copyright information.
3  * 
4  * We provide the Paradyn Tools (below described as "Paradyn")
5  * on an AS IS basis, and do not warrant its validity or performance.
6  * We reserve the right to update, modify, or discontinue this
7  * software at any time.  We shall have no obligation to supply such
8  * updates or modifications or any other form of support to you.
9  * 
10  * By your use of Paradyn, you understand and agree that we (or any
11  * other person or entity with proprietary rights in Paradyn) are
12  * under no obligation to provide either maintenance services,
13  * update services, notices of latent defects, or correction of
14  * defects for Paradyn.
15  * 
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  * 
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  * 
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29  */
30 #include "CodeObject.h"
31 #include "CFG.h"
32 #include "ParseCallback.h"
33
34 #include "Parser.h"
35 #include "ParserDetails.h"
36 #include "ParseData.h"
37 #include "debug_parse.h"
38 #include "util.h"
39
40 using namespace std;
41 using namespace Dyninst;
42 using namespace Dyninst::ParseAPI;
43
44 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
45 typedef vector< edge_pair_t > Edges_t;
46
47 #define VERBOSE_EDGE_LOG
48
49 namespace {
50
51 inline void
52 #ifndef VERBOSE_EDGE_LOG                        
53 verbose_log(Address /* currAddr */, Edges_t::iterator & /* curEdge */)
54 {
55 #else
56 verbose_log(Address currAddr, Edges_t::iterator & curEdge)
57 {
58   using namespace Dyninst::ParseAPI;
59   
60     switch(curEdge->second)
61     {
62         case CALL:
63             parsing_printf("%s[%d]: adding call edge %x->%x\n",
64                            FILE__, __LINE__, currAddr, curEdge->first);
65             break;
66         case CALL_FT:
67             parsing_printf("%s[%d]: adding function fallthrough edge %x->%x\n",
68                            FILE__, __LINE__, currAddr, curEdge->first);
69             break;
70         case FALLTHROUGH:
71             parsing_printf("%s[%d]: adding fallthrough edge %x->%x\n",
72                            FILE__, __LINE__, currAddr, curEdge->first);
73             break;
74         case COND_TAKEN:
75             parsing_printf("%s[%d]: adding conditional taken edge %x->%x\n",
76                            FILE__, __LINE__, currAddr, curEdge->first);
77             break;
78         case COND_NOT_TAKEN:
79             parsing_printf("%s[%d]: adding conditional not taken edge %x->%x\n",
80                            FILE__, __LINE__, currAddr, curEdge->first);
81             break;
82         case INDIRECT:
83             parsing_printf("%s[%d]: adding indirect edge %x->%x\n",
84                            FILE__, __LINE__, currAddr, curEdge->first);
85             break;
86         case DIRECT:
87             parsing_printf("%s[%d]: adding direct edge %x->%x\n",
88                            FILE__, __LINE__, currAddr, curEdge->first);
89             break;
90         case CATCH:
91             parsing_printf("%s[%d]: adding catch block edge %x->%x\n",
92                            FILE__, __LINE__, currAddr, curEdge->first);
93             break;
94         default:
95             parsing_printf("%s[%d]: adding unknown edge type %d edge %x->%x\n",
96                            FILE__, __LINE__, curEdge->second, currAddr, curEdge->first);
97             break;
98     }
99 #endif // VERBOSE_EDGE_LOG
100 }
101 } // anonymous namespace
102
103
104 static void 
105 getBlockInsns(Block &blk, std::set<Address> &addrs)
106 {
107     unsigned bufSize = blk.size();
108 #if defined(cap_instruction_api)
109     using namespace InstructionAPI;
110     const unsigned char* bufferBegin = (const unsigned char *)
111         (blk.obj()->cs()->getPtrToInstruction(blk.start()));
112     InstructionDecoder dec = InstructionDecoder
113         (bufferBegin, bufSize, blk.region()->getArch());
114     InstructionAdapter_t ah(dec, blk.start(), blk.obj(), blk.region(), blk.obj()->cs(), &blk);
115 #else        
116     InstrucIter iter(blk.start(), bufSize, blk.obj()->cs());
117     InstructionAdapter_t ah(iter, 
118         blk.obj(), blk.region(), obj()->cs());
119 #endif
120
121         for (; ah.getAddr() < blk.end(); ah.advance()) {
122         addrs.insert(ah.getAddr());
123     } 
124 }
125
126 /* called in defensive mode to create parseFrames at tampered addresses 
127    for functions that return TAMPER_ABS. */
128 ParseFrame * 
129 Parser::getTamperAbsFrame(Function *tamperFunc)
130 {
131     assert(TAMPER_ABS == tamperFunc->tampersStack());
132     Function * targFunc = NULL;
133
134     // get the binary's load address and subtract it
135
136     CodeSource *cs = tamperFunc->obj()->cs();
137     set<CodeRegion*> targRegs;
138
139     Address loadAddr = cs->loadAddress();
140     Address target = tamperFunc->_tamper_addr - loadAddr;
141     if (loadAddr < tamperFunc->_tamper_addr) {
142         cs->findRegions(target, targRegs);
143     }
144     if (targRegs.empty()) {
145         mal_printf("ERROR: could not create function at tamperAbs "
146                    "addr, which is in another object %lx\n",target);
147         return NULL;
148     }
149         
150     assert(1 == targRegs.size()); // we don't do analyze stack tampering on 
151                                   // platforms that use archive files
152     targFunc = _parse_data->get_func
153         (*(targRegs.begin()), 
154          target, 
155          tamperFunc->src());
156
157     if (!targFunc) {
158         targFunc = _parse_data->get_func(*(targRegs.begin()),target,RT);
159     }
160
161     if(!targFunc) {
162         mal_printf("ERROR: could not create function at tamperAbs addr %lx\n",
163                    tamperFunc->_tamper_addr);
164         return NULL;
165     }
166
167     ParseFrame * pf = NULL;
168     CodeRegion *reg = targFunc->region();
169
170     ParseFrame::Status exist = _parse_data->frameStatus(reg, target);
171     switch(exist) {
172     case ParseFrame::FRAME_ERROR:
173     case ParseFrame::PROGRESS:
174         fprintf(stderr,"ERROR: function frame at %lx in bad state, can't "
175                 "add edge; status=%d\n",target, exist);
176         return NULL;
177         break;
178     case ParseFrame::PARSED:
179         fprintf(stderr,"ERROR: function frame at %lx already parsed, can't "
180                 "add edge; status=%d\n",target, exist);
181         return NULL;
182         break;
183     case ParseFrame::BAD_LOOKUP:
184         // create new frame
185         pf = _parse_data->findFrame(reg, target);
186         assert( !pf );
187         pf = new ParseFrame(targFunc,_parse_data);
188         break;
189     case ParseFrame::UNPARSED:
190     case ParseFrame::CALL_BLOCKED:
191         pf = _parse_data->findFrame(reg, target);
192         if ( !pf ) {
193             fprintf(stderr,"ERROR: no function frame at %lx for frame "
194                     "that should exist, can't add edge; status=%d\n",
195                     target, exist);
196             return NULL;
197         }
198         break;
199     default:
200         assert(0);
201     }
202         
203     // make a temp edge
204     const Function::blocklist & ret_blks = tamperFunc->returnBlocks();
205     for (Function::blocklist::const_iterator bit = ret_blks.begin(); 
206          bit != ret_blks.end(); 
207          bit++)
208     {
209         Edge *edge = link_tempsink(*bit, CALL);
210
211         // create new bundle since we're not adding CALL,CALL_FT edge pairs
212         /* FIXME flag for Drew or Kevin:
213              this code (see original in comment below) does not add
214             the new work element to the worklist; I preserved this
215             behavior while changing code to fit the new, 
216             leak-free idiom, but I don't know whether this is intended
217             or a bug. You might want to wrap the call in a frame.pushWork.
218          */
219         (void)pf->mkWork(NULL,edge,target,true,true);
220         /*
221         ParseWorkBundle *bundle = new ParseWorkBundle();
222         pf->work_bundles.push_back(bundle);
223         bundle->add(
224             new ParseWorkElem(
225                 bundle,
226                 edge,
227                 target,
228                 true,
229                 true)
230           );
231         */
232     }
233
234     return pf;
235 }
236
237 // Param pf tampers with its stack by a relative or absolute amount. 
238 // In the first case, adjust CALL_FT target edge if there is one
239 // In the second case, add a new ParseFrame to the worklist or 
240 // trigger parsing in the target object. 
241 void
242 Parser::tamper_post_processing(vector<ParseFrame *> & work, ParseFrame *pf)
243 {
244     // tampers with stack by relative amount: 
245     // adjust CALL_FT target edge if there is one
246     for (unsigned widx = 0; 
247          pf->func->tampersStack() == TAMPER_REL && 
248          widx < work.size(); 
249          widx++) 
250     {
251         if (work[widx]->status() == ParseFrame::CALL_BLOCKED &&
252             pf->func == work[widx]->call_target) 
253         {
254             for (unsigned bidx=0 ; 
255                  bidx < work[widx]->work_bundles.size(); 
256                  bidx++) 
257             {
258                 const vector<ParseWorkElem*> &elems = 
259                     work[widx]->work_bundles[bidx]->elems();
260                 bool rightBundle = false;
261                 ParseWorkElem * ftEdge = NULL;
262                 for (unsigned eix=0; eix < elems.size(); eix++)
263                 {
264                     if (NULL == elems[eix]->edge()) 
265                     {
266                         continue;
267                     }
268                     if (elems[eix]->edge()->type() == CALL &&
269                         elems[eix]->target()==pf->func->addr())
270                     {
271                         rightBundle = true;
272                     }
273                     else if (elems[eix]->edge()->type() == CALL_FT)
274                     {
275                         ftEdge = elems[eix];
276                     }
277                 }
278                 if (rightBundle && ftEdge) 
279                 {
280                     ftEdge->setTarget(ftEdge->target() + 
281                                       pf->func->_tamper_addr);
282                 }
283             }
284         }
285     }
286     // create frame for TAMPER_ABS target in this object or parse
287     // in target object 
288     if (pf->func->tampersStack() == TAMPER_ABS) 
289     {
290         Address objLoad = 0;
291         CodeObject *targObj = NULL;
292         if (_pcb.absAddr(pf->func->_tamper_addr, 
293                          objLoad, 
294                          targObj)) 
295         {
296             if (targObj == &_obj) { // target is in this object, add frame
297                 ParseFrame * tf = getTamperAbsFrame(pf->func);
298                 if (tf && ! _parse_data->findFrame(tf->func->region(),
299                                                    tf->func->addr()) ) 
300                 {
301                     init_frame(*tf);
302                     frames.push_back(tf);
303                     _parse_data->record_frame(tf);
304                     _pcb.updateCodeBytes(pf->func->_tamper_addr - objLoad);
305                 }
306                 if (tf) {
307                     mal_printf("adding TAMPER_ABS target %lx frame\n", 
308                                pf->func->_tamper_addr);
309                     work.push_back(tf);
310                 }
311             }
312             else { // target is in another object, parse there
313                 mal_printf("adding TAMPER_ABS target %lx "
314                            "in separate object at %lx\n", 
315                            pf->func->_tamper_addr, objLoad);
316                 _obj.parse(pf->func->_tamper_addr - objLoad, true);
317             }
318         }
319         else {
320             mal_printf("discarding invalid TAMPER_ABS target %lx\n", 
321                        pf->func->_tamper_addr);
322         }
323     }
324 }
325
326
327 /*
328  * Extra handling for bad jump instructions
329  */
330 inline
331 void Parser::ProcessUnresBranchEdge(
332     ParseFrame & frame,
333     Block * cur,
334     InstructionAdapter_t & ah,
335     Address target)
336 {
337     ParseCallback::interproc_details det;
338     det.ibuf = (unsigned char*)
339        frame.func->isrc()->getPtrToInstruction(ah.getAddr());
340     det.isize = ah.getSize();
341     if (((Address)-1) == target) {
342         det.data.unres.target = 0;
343     } else {
344         det.data.unres.target = target;
345     }
346     det.type = ParseCallback::interproc_details::unresolved;
347     det.data.unres.target = target;
348
349     bool valid; Address addr;
350     boost::tie(valid, addr) = ah.getCFT();
351     if (!valid) {
352         det.data.unres.dynamic = true;
353         det.data.unres.absolute_address = true;
354     } else {
355         det.data.unres.dynamic = false;
356         det.data.unres.absolute_address = false;
357     }
358     _pcb.interproc_cf(frame.func,cur,ah.getAddr(),&det);
359 }
360
361 /*
362  * Extra handling of return instructions
363  */
364 inline
365 void Parser::ProcessReturnInsn(
366     ParseFrame & frame,
367     Block * cur,
368     InstructionAdapter_t & ah)
369 {
370     // returns always target the sink block
371     link(cur,_sink,RET,true);
372
373     ParseCallback::interproc_details det;
374     det.ibuf = (unsigned char*)
375         frame.func->isrc()->getPtrToInstruction(ah.getAddr());
376     det.isize = ah.getSize();
377     det.type = ParseCallback::interproc_details::ret;
378
379     _pcb.interproc_cf(frame.func, cur, ah.getAddr(),&det);
380 }
381
382
383 /*
384  * Extra handling for literal call instructions
385  * as well as other instructions treated as calls
386  */
387 inline
388 void Parser::ProcessCallInsn(
389     ParseFrame & frame,
390     Block * cur,
391     InstructionAdapter_t & ah,
392     bool isDynamic,
393     bool isAbsolute,
394     bool isResolved,
395     Address target)
396 {
397     ParseCallback::interproc_details det;
398     det.ibuf = (unsigned char*)
399        frame.func->isrc()->getPtrToInstruction(ah.getAddr());
400     det.isize = ah.getSize();
401     
402     if(ah.isCall()) {
403         det.data.call.absolute_address = isAbsolute;
404         det.data.call.dynamic_call = isDynamic;
405         det.data.call.target = target;
406         if (likely(isResolved))
407             det.type = ParseCallback::interproc_details::call; 
408         else
409             det.type = ParseCallback::interproc_details::unresolved; 
410     }
411     else
412         det.type = ParseCallback::interproc_details::branch_interproc;
413     
414     _pcb.interproc_cf(frame.func,cur,ah.getAddr(),&det);
415 }
416
417 void Parser::ProcessCFInsn(
418     ParseFrame & frame,
419     Block * cur,
420     InstructionAdapter_t & ah)
421 {
422     FuncReturnStatus insn_ret;
423     Edges_t edges_out;
424     ParseWorkBundle * bundle = NULL;
425
426
427     // terminate the block at this address
428     end_block(cur,ah);
429     
430     // Instruction adapter provides edge estimates from an instruction
431     parsing_printf("Getting edges\n");
432     ah.getNewEdges(edges_out, frame.func, cur, frame.num_insns, &plt_entries); 
433     parsing_printf("Returned %d edges\n", edges_out.size());
434     if (unlikely(_obj.defensiveMode() && !ah.isCall() && edges_out.size())) {
435         // only parse branch edges that align with existing blocks
436         bool hasUnalignedEdge = false;
437         set<CodeRegion*> tregs;
438         set<Block*> tblocks;
439         set<Address> insns_cur;
440         for(Edges_t::iterator curEdge = edges_out.begin();
441             !hasUnalignedEdge && curEdge != edges_out.end(); 
442             ++curEdge)
443         {
444             if (cur->end() <= curEdge->first) {
445               parsing_printf("%s[%d] skipping edge\n", FILE__, __LINE__);
446                 continue;
447             }
448             _obj.cs()->findRegions(curEdge->first,tregs);
449             if (!tregs.empty()) {
450                 _parse_data->findBlocks(*tregs.begin(),curEdge->first, tblocks);
451                 for (set<Block*>::iterator bit= tblocks.begin(); 
452                      !hasUnalignedEdge && bit != tblocks.end(); 
453                      bit++) 
454                 {
455                     if ((*bit)->end() <= cur->start() ||
456                         (*bit) == cur) 
457                     {
458                       parsing_printf("%s[%d] skipping edge\n", FILE__, __LINE__);
459                         continue;
460                     }
461                     set<Address> insns_tblk;
462                     getBlockInsns(**bit,insns_tblk);
463                     if (insns_cur.empty()) {
464                         getBlockInsns(*cur,insns_cur);
465                     }
466                     if ((*bit)->start() < cur->start()) {
467                         if (insns_tblk.end() == insns_tblk.find(cur->start())) {
468                             hasUnalignedEdge = true;
469                         } 
470                     } else if (insns_cur.end() == insns_cur.find((*bit)->start())) {
471                         hasUnalignedEdge = true;
472                     }
473                     if (hasUnalignedEdge) {
474                         mal_printf("Found unaligned blocks [%lx %lx) [%lx %lx), adding abruptEnd "
475                                    "point and killing out edges\n", cur->start(), cur->end(), 
476                                    (*bit)->start(), (*bit)->end());
477                     }
478                 }
479             }
480         }
481         if (true == hasUnalignedEdge) {
482           parsing_printf("Has unaligned edge, clearing and treating as abrupt end\n");
483             edges_out.clear();
484             ParseCallback::default_details det(
485                 (unsigned char*) cur->region()->getPtrToInstruction(cur->lastInsnAddr()),
486                 cur->end() - cur->lastInsnAddr(),
487                 true);
488             _pcb.abruptEnd_cf(cur->lastInsnAddr(),cur,&det);
489         }
490     }
491
492     insn_ret = ah.getReturnStatus(frame.func,frame.num_insns); 
493
494     // Update function return status if possible
495     if(unlikely(insn_ret != UNSET && frame.func->_rs < RETURN))
496         frame.func->set_retstatus(insn_ret);
497
498     // Return instructions need extra processing
499     if(insn_ret == RETURN)
500        ProcessReturnInsn(frame,cur,ah);
501
502     bool dynamic_call = ah.isDynamicCall();
503     bool absolute_call = ah.isAbsoluteCall();
504     // unresolved is true for indirect calls, unresolved indirect branches, 
505     // and later on is set set to true for transfers to bad addresses
506     bool has_unres = ah.hasUnresolvedControlFlow(frame.func,frame.num_insns);
507
508     parsing_printf("\t\t%d edges:\n",edges_out.size());
509     for(Edges_t::iterator curEdge = edges_out.begin();
510         curEdge != edges_out.end(); ++curEdge)
511     {
512         Edge * newedge = NULL;
513         bool resolvable_edge = true;
514         bool tailcall = false;
515
516         if(!is_code(frame.func,curEdge->first) &&
517            !HASHDEF(plt_entries,curEdge->first))
518         {
519             if(curEdge->second != NOEDGE || !dynamic_call) {
520                 has_unres = true;
521                 resolvable_edge = false;
522                 if (curEdge->second != -1 && _obj.defensiveMode()) 
523                     mal_printf("bad edge target at %lx type %d\n",
524                                curEdge->first, curEdge->second);
525             }
526         }
527
528         /*
529          * Call case 
530          */ 
531         if(curEdge->second == NOEDGE)
532         {
533             // call callback
534             resolvable_edge = resolvable_edge && !dynamic_call;
535             ProcessCallInsn(frame,cur,ah,dynamic_call,
536                 absolute_call,resolvable_edge,curEdge->first);
537
538             tailcall = !dynamic_call && 
539                ah.isTailCall(frame.func, CALL, frame.num_insns);
540             if(resolvable_edge) {
541                 if (tailcall) {
542                     newedge = link_tempsink(cur,DIRECT);
543                 } else newedge = link_tempsink(cur,CALL);
544             }
545             else { 
546                 if (tailcall) {
547                     newedge = link(cur,_sink,INDIRECT,true);
548                 }
549                 else newedge = link(cur,_sink,CALL,true);
550             }
551             if(!ah.isCall()) {
552                parsing_printf("Setting edge 0x%lx (0x%lx/0x%lx) to interproc\n",
553                               newedge,
554                               newedge->src()->start(), 
555                               newedge->trg()->start());
556                 newedge->_type._interproc = true;
557             }
558         }
559         /*
560          * All other edge types are handled identically
561          */
562         else
563         {
564             if(resolvable_edge) {
565                 newedge = link_tempsink(cur,curEdge->second);
566             }
567             else
568                 newedge = link(cur,_sink,curEdge->second,true);
569         }
570
571         if (ah.isTailCall(frame.func, curEdge->second, frame.num_insns)) {
572            parsing_printf("Setting edge 0x%lx (0x%lx/0x%lx) to interproc (tail call)\n",
573                           newedge,
574                           newedge->src()->start(),
575                           newedge->trg()->start());
576            newedge->_type._interproc = true;
577         }
578
579         if(!bundle) {
580             bundle = new ParseWorkBundle();
581             frame.work_bundles.push_back(bundle);
582         }
583
584         verbose_log(ah.getAddr(),curEdge);
585
586         ParseWorkElem * we = 
587           bundle->add(
588             new ParseWorkElem(
589                 bundle,
590                 newedge,
591                 curEdge->first,
592                 resolvable_edge,
593                 tailcall)
594           );
595
596         // We will not attempt to further process
597         // unresolvable edges; they stay sinks
598         if(resolvable_edge) {
599             parsing_printf("[%s:%d] pushing %lx onto worklist\n",
600                 FILE__,__LINE__,we->target());
601             frame.pushWork(we);
602
603             if (unlikely(_obj.defensiveMode())) {
604                 // update the underlying code bytes for CF targets
605                 if (  CALL == curEdge->second
606                       || DIRECT == curEdge->second
607                       || COND_TAKEN == curEdge->second
608                                           || NOEDGE == curEdge->second)
609                 {
610
611                     _pcb.updateCodeBytes(curEdge->first);
612                 }
613             }
614         } 
615         else if( unlikely(_obj.defensiveMode() && NOEDGE != curEdge->second) )
616         {   
617             ProcessUnresBranchEdge(frame, cur, ah, curEdge->first);
618         }
619     }
620
621     if (unlikely(has_unres && edges_out.empty())) {
622         link(cur, _sink, INDIRECT, true);
623         ProcessUnresBranchEdge(frame, cur, ah, -1);
624          }
625
626     if(ah.isDelaySlot())
627         ah.advance();
628
629         if(!frame.func->_cleans_stack && ah.cleansStack()) {
630         frame.func->_cleans_stack = true;
631         }
632 }