Convince Clang that it's possible for an EdgeTypeEnum to be -1
[dyninst.git] / parseAPI / src / ParserDetails.C
1 /*
2  * See the dyninst/COPYRIGHT file for copyright information.
3  * 
4  * We provide the Paradyn Tools (below described as "Paradyn")
5  * on an AS IS basis, and do not warrant its validity or performance.
6  * We reserve the right to update, modify, or discontinue this
7  * software at any time.  We shall have no obligation to supply such
8  * updates or modifications or any other form of support to you.
9  * 
10  * By your use of Paradyn, you understand and agree that we (or any
11  * other person or entity with proprietary rights in Paradyn) are
12  * under no obligation to provide either maintenance services,
13  * update services, notices of latent defects, or correction of
14  * defects for Paradyn.
15  * 
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  * 
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  * 
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29  */
30 #include "CodeObject.h"
31 #include "CFG.h"
32 #include "ParseCallback.h"
33
34 #include "Parser.h"
35 #include "ParserDetails.h"
36 #include "ParseData.h"
37 #include "debug_parse.h"
38 #include "util.h"
39
40 using namespace std;
41 using namespace Dyninst;
42 using namespace Dyninst::ParseAPI;
43
44 typedef std::pair< Address, EdgeTypeEnum > edge_pair_t;
45 typedef vector< edge_pair_t > Edges_t;
46
47 #define VERBOSE_EDGE_LOG
48
49 namespace {
50
51 inline void
52 #ifndef VERBOSE_EDGE_LOG                        
53 verbose_log(Address /* currAddr */, Edges_t::iterator & /* curEdge */)
54 {
55 #else
56 verbose_log(Address currAddr, Edges_t::iterator & curEdge)
57 {
58   using namespace Dyninst::ParseAPI;
59   
60     switch(curEdge->second)
61     {
62         case CALL:
63             parsing_printf("%s[%d]: adding call edge %x->%x\n",
64                            FILE__, __LINE__, currAddr, curEdge->first);
65             break;
66         case CALL_FT:
67             parsing_printf("%s[%d]: adding function fallthrough edge %x->%x\n",
68                            FILE__, __LINE__, currAddr, curEdge->first);
69             break;
70         case FALLTHROUGH:
71             parsing_printf("%s[%d]: adding fallthrough edge %x->%x\n",
72                            FILE__, __LINE__, currAddr, curEdge->first);
73             break;
74         case COND_TAKEN:
75             parsing_printf("%s[%d]: adding conditional taken edge %x->%x\n",
76                            FILE__, __LINE__, currAddr, curEdge->first);
77             break;
78         case COND_NOT_TAKEN:
79             parsing_printf("%s[%d]: adding conditional not taken edge %x->%x\n",
80                            FILE__, __LINE__, currAddr, curEdge->first);
81             break;
82         case INDIRECT:
83             parsing_printf("%s[%d]: adding indirect edge %x->%x\n",
84                            FILE__, __LINE__, currAddr, curEdge->first);
85             break;
86         case DIRECT:
87             parsing_printf("%s[%d]: adding direct edge %x->%x\n",
88                            FILE__, __LINE__, currAddr, curEdge->first);
89             break;
90         case CATCH:
91             parsing_printf("%s[%d]: adding catch block edge %x->%x\n",
92                            FILE__, __LINE__, currAddr, curEdge->first);
93             break;
94         default:
95             parsing_printf("%s[%d]: adding unknown edge type %d edge %x->%x\n",
96                            FILE__, __LINE__, curEdge->second, currAddr, curEdge->first);
97             break;
98     }
99 #endif // VERBOSE_EDGE_LOG
100 }
101 } // anonymous namespace
102
103
104 static void 
105 getBlockInsns(Block &blk, std::set<Address> &addrs)
106 {
107     unsigned bufSize = blk.size();
108     using namespace InstructionAPI;
109     const unsigned char* bufferBegin = (const unsigned char *)
110         (blk.obj()->cs()->getPtrToInstruction(blk.start()));
111     InstructionDecoder dec = InstructionDecoder
112         (bufferBegin, bufSize, blk.region()->getArch());
113     InstructionAdapter_t ah(dec, blk.start(), blk.obj(), blk.region(), blk.obj()->cs(), &blk);
114
115         for (; ah.getAddr() < blk.end(); ah.advance()) {
116         addrs.insert(ah.getAddr());
117     } 
118 }
119
120 /* called in defensive mode to create parseFrames at tampered addresses 
121    for functions that return TAMPER_ABS. */
122 ParseFrame * 
123 Parser::getTamperAbsFrame(Function *tamperFunc)
124 {
125     assert(TAMPER_ABS == tamperFunc->tampersStack());
126     Function * targFunc = NULL;
127
128     // get the binary's load address and subtract it
129
130     CodeSource *cs = tamperFunc->obj()->cs();
131     set<CodeRegion*> targRegs;
132
133     Address loadAddr = cs->loadAddress();
134     Address target = tamperFunc->_tamper_addr - loadAddr;
135     if (loadAddr < tamperFunc->_tamper_addr) {
136         cs->findRegions(target, targRegs);
137     }
138     if (targRegs.empty()) {
139         mal_printf("ERROR: could not create function at tamperAbs "
140                    "addr, which is in another object %lx\n",target);
141         return NULL;
142     }
143         
144     assert(1 == targRegs.size()); // we don't do analyze stack tampering on 
145                                   // platforms that use archive files
146     targFunc = _parse_data->get_func
147         (*(targRegs.begin()), 
148          target, 
149          tamperFunc->src());
150
151     if (!targFunc) {
152         targFunc = _parse_data->get_func(*(targRegs.begin()),target,RT);
153     }
154
155     if(!targFunc) {
156         mal_printf("ERROR: could not create function at tamperAbs addr %lx\n",
157                    tamperFunc->_tamper_addr);
158         return NULL;
159     }
160
161     ParseFrame * pf = NULL;
162     CodeRegion *reg = targFunc->region();
163
164     ParseFrame::Status exist = _parse_data->frameStatus(reg, target);
165     switch(exist) {
166     case ParseFrame::FRAME_ERROR:
167     case ParseFrame::PROGRESS:
168         fprintf(stderr,"ERROR: function frame at %lx in bad state, can't "
169                 "add edge; status=%d\n",target, exist);
170         return NULL;
171         break;
172     case ParseFrame::PARSED:
173         fprintf(stderr,"ERROR: function frame at %lx already parsed, can't "
174                 "add edge; status=%d\n",target, exist);
175         return NULL;
176         break;
177     case ParseFrame::BAD_LOOKUP:
178         // create new frame
179         pf = _parse_data->findFrame(reg, target);
180         assert( !pf );
181         pf = new ParseFrame(targFunc,_parse_data);
182         break;
183     case ParseFrame::UNPARSED:
184     case ParseFrame::CALL_BLOCKED:
185         pf = _parse_data->findFrame(reg, target);
186         if ( !pf ) {
187             fprintf(stderr,"ERROR: no function frame at %lx for frame "
188                     "that should exist, can't add edge; status=%d\n",
189                     target, exist);
190             return NULL;
191         }
192         break;
193     default:
194         assert(0);
195     }
196         
197     // make a temp edge
198     const Function::blocklist & ret_blks = tamperFunc->returnBlocks();
199     for (Function::blocklist::const_iterator bit = ret_blks.begin(); 
200          bit != ret_blks.end(); 
201          bit++)
202     {
203         Edge *edge = link_tempsink(*bit, CALL);
204
205         // create new bundle since we're not adding CALL,CALL_FT edge pairs
206         /* FIXME flag for Drew or Kevin:
207              this code (see original in comment below) does not add
208             the new work element to the worklist; I preserved this
209             behavior while changing code to fit the new, 
210             leak-free idiom, but I don't know whether this is intended
211             or a bug. You might want to wrap the call in a frame.pushWork.
212          */
213         (void)pf->mkWork(NULL,edge,target,true,true);
214         /*
215         ParseWorkBundle *bundle = new ParseWorkBundle();
216         pf->work_bundles.push_back(bundle);
217         bundle->add(
218             new ParseWorkElem(
219                 bundle,
220                 edge,
221                 target,
222                 true,
223                 true)
224           );
225         */
226     }
227
228     return pf;
229 }
230
231 // Param pf tampers with its stack by a relative or absolute amount. 
232 // In the first case, adjust CALL_FT target edge if there is one
233 // In the second case, add a new ParseFrame to the worklist or 
234 // trigger parsing in the target object. 
235 void
236 Parser::tamper_post_processing(vector<ParseFrame *> & work, ParseFrame *pf)
237 {
238     // tampers with stack by relative amount: 
239     // adjust CALL_FT target edge if there is one
240     for (unsigned widx = 0; 
241          pf->func->tampersStack() == TAMPER_REL && 
242          widx < work.size(); 
243          widx++) 
244     {
245         if (work[widx]->status() == ParseFrame::CALL_BLOCKED &&
246             pf->func == work[widx]->call_target) 
247         {
248             for (unsigned bidx=0 ; 
249                  bidx < work[widx]->work_bundles.size(); 
250                  bidx++) 
251             {
252                 const vector<ParseWorkElem*> &elems = 
253                     work[widx]->work_bundles[bidx]->elems();
254                 bool rightBundle = false;
255                 ParseWorkElem * ftEdge = NULL;
256                 for (unsigned eix=0; eix < elems.size(); eix++)
257                 {
258                     if (NULL == elems[eix]->edge()) 
259                     {
260                         continue;
261                     }
262                     if (elems[eix]->edge()->type() == CALL &&
263                         elems[eix]->target()==pf->func->addr())
264                     {
265                         rightBundle = true;
266                     }
267                     else if (elems[eix]->edge()->type() == CALL_FT)
268                     {
269                         ftEdge = elems[eix];
270                     }
271                 }
272                 if (rightBundle && ftEdge) 
273                 {
274                     ftEdge->setTarget(ftEdge->target() + 
275                                       pf->func->_tamper_addr);
276                 }
277             }
278         }
279     }
280     // create frame for TAMPER_ABS target in this object or parse
281     // in target object 
282     if (pf->func->tampersStack() == TAMPER_ABS) 
283     {
284         Address objLoad = 0;
285         CodeObject *targObj = NULL;
286         if (_pcb.absAddr(pf->func->_tamper_addr, 
287                          objLoad, 
288                          targObj)) 
289         {
290             if (targObj == &_obj) { // target is in this object, add frame
291                 ParseFrame * tf = getTamperAbsFrame(pf->func);
292                 if (tf && ! _parse_data->findFrame(tf->func->region(),
293                                                    tf->func->addr()) ) 
294                 {
295                     init_frame(*tf);
296                     frames.push_back(tf);
297                     _parse_data->record_frame(tf);
298                     _pcb.updateCodeBytes(pf->func->_tamper_addr - objLoad);
299                 }
300                 if (tf) {
301                     mal_printf("adding TAMPER_ABS target %lx frame\n", 
302                                pf->func->_tamper_addr);
303                     work.push_back(tf);
304                 }
305             }
306             else { // target is in another object, parse there
307                 mal_printf("adding TAMPER_ABS target %lx "
308                            "in separate object at %lx\n", 
309                            pf->func->_tamper_addr, objLoad);
310                 _obj.parse(pf->func->_tamper_addr - objLoad, true);
311             }
312         }
313         else {
314             mal_printf("discarding invalid TAMPER_ABS target %lx\n", 
315                        pf->func->_tamper_addr);
316         }
317     }
318 }
319
320
321 /*
322  * Extra handling for bad jump instructions
323  */
324 inline
325 void Parser::ProcessUnresBranchEdge(
326     ParseFrame & frame,
327     Block * cur,
328     InstructionAdapter_t & ah,
329     Address target)
330 {
331     ParseCallback::interproc_details det;
332     det.ibuf = (unsigned char*)
333        frame.func->isrc()->getPtrToInstruction(ah.getAddr());
334     det.isize = ah.getSize();
335     if (((Address)-1) == target) {
336         det.data.unres.target = 0;
337     } else {
338         det.data.unres.target = target;
339     }
340     det.type = ParseCallback::interproc_details::unresolved;
341     det.data.unres.target = target;
342
343     bool valid; Address addr;
344     boost::tie(valid, addr) = ah.getCFT();
345     if (!valid) {
346         det.data.unres.dynamic = true;
347         det.data.unres.absolute_address = true;
348     } else {
349         det.data.unres.dynamic = false;
350         det.data.unres.absolute_address = false;
351     }
352     _pcb.interproc_cf(frame.func,cur,ah.getAddr(),&det);
353 }
354
355 /*
356  * Extra handling of return instructions
357  */
358 inline
359 void Parser::ProcessReturnInsn(
360     ParseFrame & frame,
361     Block * cur,
362     InstructionAdapter_t & ah)
363 {
364     // returns always target the sink block
365     link(cur,_sink,RET,true);
366
367     ParseCallback::interproc_details det;
368     det.ibuf = (unsigned char*)
369         frame.func->isrc()->getPtrToInstruction(ah.getAddr());
370     det.isize = ah.getSize();
371     det.type = ParseCallback::interproc_details::ret;
372
373     _pcb.interproc_cf(frame.func, cur, ah.getAddr(),&det);
374 }
375
376
377 /*
378  * Extra handling for literal call instructions
379  * as well as other instructions treated as calls
380  */
381 inline
382 void Parser::ProcessCallInsn(
383     ParseFrame & frame,
384     Block * cur,
385     InstructionAdapter_t & ah,
386     bool isDynamic,
387     bool isAbsolute,
388     bool isResolved,
389     Address target)
390 {
391     ParseCallback::interproc_details det;
392     det.ibuf = (unsigned char*)
393        frame.func->isrc()->getPtrToInstruction(ah.getAddr());
394     det.isize = ah.getSize();
395     
396     if(ah.isCall()) {
397         det.data.call.absolute_address = isAbsolute;
398         det.data.call.dynamic_call = isDynamic;
399         det.data.call.target = target;
400         if (likely(isResolved))
401             det.type = ParseCallback::interproc_details::call; 
402         else
403             det.type = ParseCallback::interproc_details::unresolved; 
404     }
405     else
406         det.type = ParseCallback::interproc_details::branch_interproc;
407     
408     _pcb.interproc_cf(frame.func,cur,ah.getAddr(),&det);
409 }
410
411 void Parser::ProcessCFInsn(
412     ParseFrame & frame,
413     Block * cur,
414     InstructionAdapter_t & ah)
415 {
416     FuncReturnStatus insn_ret;
417     Edges_t edges_out;
418     ParseWorkBundle * bundle = NULL;
419
420
421     // terminate the block at this address
422     end_block(cur,ah);
423     
424     // Instruction adapter provides edge estimates from an instruction
425     parsing_printf("Getting edges\n");
426     ah.getNewEdges(edges_out, frame.func, cur, frame.num_insns, &plt_entries); 
427     parsing_printf("Returned %d edges\n", edges_out.size());
428     if (unlikely(_obj.defensiveMode() && !ah.isCall() && edges_out.size())) {
429         // only parse branch edges that align with existing blocks
430         bool hasUnalignedEdge = false;
431         set<CodeRegion*> tregs;
432         set<Block*> tblocks;
433         set<Address> insns_cur;
434         for(Edges_t::iterator curEdge = edges_out.begin();
435             !hasUnalignedEdge && curEdge != edges_out.end(); 
436             ++curEdge)
437         {
438             if (cur->end() <= curEdge->first) {
439               parsing_printf("%s[%d] skipping edge\n", FILE__, __LINE__);
440                 continue;
441             }
442             _obj.cs()->findRegions(curEdge->first,tregs);
443             if (!tregs.empty()) {
444                 _parse_data->findBlocks(*tregs.begin(),curEdge->first, tblocks);
445                 for (set<Block*>::iterator bit= tblocks.begin(); 
446                      !hasUnalignedEdge && bit != tblocks.end(); 
447                      bit++) 
448                 {
449                     if ((*bit)->end() <= cur->start() ||
450                         (*bit) == cur) 
451                     {
452                       parsing_printf("%s[%d] skipping edge\n", FILE__, __LINE__);
453                         continue;
454                     }
455                     set<Address> insns_tblk;
456                     getBlockInsns(**bit,insns_tblk);
457                     if (insns_cur.empty()) {
458                         getBlockInsns(*cur,insns_cur);
459                     }
460                     if ((*bit)->start() < cur->start()) {
461                         if (insns_tblk.end() == insns_tblk.find(cur->start())) {
462                             hasUnalignedEdge = true;
463                         } 
464                     } else if (insns_cur.end() == insns_cur.find((*bit)->start())) {
465                         hasUnalignedEdge = true;
466                     }
467                     if (hasUnalignedEdge) {
468                         mal_printf("Found unaligned blocks [%lx %lx) [%lx %lx), adding abruptEnd "
469                                    "point and killing out edges\n", cur->start(), cur->end(), 
470                                    (*bit)->start(), (*bit)->end());
471                     }
472                 }
473             }
474         }
475         if (true == hasUnalignedEdge) {
476           parsing_printf("Has unaligned edge, clearing and treating as abrupt end\n");
477             edges_out.clear();
478             ParseCallback::default_details det(
479                 (unsigned char*) cur->region()->getPtrToInstruction(cur->lastInsnAddr()),
480                 cur->end() - cur->lastInsnAddr(),
481                 true);
482             _pcb.abruptEnd_cf(cur->lastInsnAddr(),cur,&det);
483         }
484     }
485
486     insn_ret = ah.getReturnStatus(frame.func,frame.num_insns); 
487
488     // Update function return status if possible
489     if(unlikely(insn_ret != UNSET && frame.func->_rs < RETURN))
490         frame.func->set_retstatus(insn_ret);
491
492     // Return instructions need extra processing
493     if(insn_ret == RETURN)
494        ProcessReturnInsn(frame,cur,ah);
495
496     bool dynamic_call = ah.isDynamicCall();
497     bool absolute_call = ah.isAbsoluteCall();
498     // unresolved is true for indirect calls, unresolved indirect branches, 
499     // and later on is set set to true for transfers to bad addresses
500     bool has_unres = ah.hasUnresolvedControlFlow(frame.func,frame.num_insns);
501
502     parsing_printf("\t\t%d edges:\n",edges_out.size());
503     for(Edges_t::iterator curEdge = edges_out.begin();
504         curEdge != edges_out.end(); ++curEdge)
505     {
506         Edge * newedge = NULL;
507         bool resolvable_edge = true;
508         bool tailcall = false;
509
510         if(!is_code(frame.func,curEdge->first) &&
511            !HASHDEF(plt_entries,curEdge->first))
512         {
513             if(curEdge->second != NOEDGE || !dynamic_call) {
514                 has_unres = true;
515                 resolvable_edge = false;
516                 if ((int)curEdge->second != -1 && _obj.defensiveMode()) 
517                     mal_printf("bad edge target at %lx type %d\n",
518                                curEdge->first, curEdge->second);
519             }
520         }
521
522         /*
523          * Call case 
524          */ 
525         if(curEdge->second == NOEDGE)
526         {
527             // call callback
528             resolvable_edge = resolvable_edge && !dynamic_call;
529             ProcessCallInsn(frame,cur,ah,dynamic_call,
530                 absolute_call,resolvable_edge,curEdge->first);
531
532             tailcall = !dynamic_call && 
533                ah.isTailCall(frame.func, CALL, frame.num_insns);
534             if(resolvable_edge) {
535                 if (tailcall) {
536                     newedge = link_tempsink(cur,DIRECT);
537                 } else newedge = link_tempsink(cur,CALL);
538             }
539             else { 
540                 if (tailcall) {
541                     newedge = link(cur,_sink,INDIRECT,true);
542                 }
543                 else newedge = link(cur,_sink,CALL,true);
544             }
545             if(!ah.isCall()) {
546                parsing_printf("Setting edge 0x%lx (0x%lx/0x%lx) to interproc\n",
547                               newedge,
548                               newedge->src()->start(), 
549                               newedge->trg()->start());
550                 newedge->_type._interproc = true;
551             }
552         }
553         /*
554          * All other edge types are handled identically
555          */
556         else
557         {
558             if(resolvable_edge) {
559                 newedge = link_tempsink(cur,curEdge->second);
560             }
561             else
562                 newedge = link(cur,_sink,curEdge->second,true);
563         }
564
565         if (ah.isTailCall(frame.func, curEdge->second, frame.num_insns)) {
566            parsing_printf("Setting edge 0x%lx (0x%lx/0x%lx) to interproc (tail call)\n",
567                           newedge,
568                           newedge->src()->start(),
569                           newedge->trg()->start());
570            newedge->_type._interproc = true;
571         }
572
573         if(!bundle) {
574             bundle = new ParseWorkBundle();
575             frame.work_bundles.push_back(bundle);
576         }
577
578         verbose_log(ah.getAddr(),curEdge);
579
580         ParseWorkElem * we = 
581           bundle->add(
582             new ParseWorkElem(
583                 bundle,
584                 newedge,
585                 curEdge->first,
586                 resolvable_edge,
587                 tailcall)
588           );
589
590         // We will not attempt to further process
591         // unresolvable edges; they stay sinks
592         if(resolvable_edge) {
593             parsing_printf("[%s:%d] pushing %lx onto worklist\n",
594                 FILE__,__LINE__,we->target());
595             frame.pushWork(we);
596
597             if (unlikely(_obj.defensiveMode())) {
598                 // update the underlying code bytes for CF targets
599                 if (  CALL == curEdge->second
600                       || DIRECT == curEdge->second
601                       || COND_TAKEN == curEdge->second
602                                           || NOEDGE == curEdge->second)
603                 {
604
605                     _pcb.updateCodeBytes(curEdge->first);
606                 }
607             }
608         } 
609         else if( unlikely(_obj.defensiveMode() && NOEDGE != curEdge->second) )
610         {   
611             ProcessUnresBranchEdge(frame, cur, ah, curEdge->first);
612         }
613     }
614
615     if (unlikely(has_unres && edges_out.empty())) {
616         link(cur, _sink, INDIRECT, true);
617         ProcessUnresBranchEdge(frame, cur, ah, -1);
618          }
619
620     if(ah.isDelaySlot())
621         ah.advance();
622
623         if(!frame.func->_cleans_stack && ah.cleansStack()) {
624         frame.func->_cleans_stack = true;
625         }
626 }