Removes cyclic dependency between symEval and parseAPI
[dyninst.git] / parseAPI / src / IA_x86.C
1 /*
2  * Copyright (c) 1996-2009 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as "Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * By your use of Paradyn, you understand and agree that we (or any
12  * other person or entity with proprietary rights in Paradyn) are
13  * under no obligation to provide either maintenance services,
14  * update services, notices of latent defects, or correction of
15  * defects for Paradyn.
16  * 
17  * This library is free software; you can redistribute it and/or
18  * modify it under the terms of the GNU Lesser General Public
19  * License as published by the Free Software Foundation; either
20  * version 2.1 of the License, or (at your option) any later version.
21  * 
22  * This library is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * Lesser General Public License for more details.
26  * 
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30  */
31
32
33 #include "IA_IAPI.h"
34
35 #include "Register.h"
36 #include "Dereference.h"
37 #include "Immediate.h"
38 #include "BinaryFunction.h"
39 #include "debug_parse.h"
40
41 #include <deque>
42
43 using namespace Dyninst;
44 using namespace InstructionAPI;
45 using namespace Dyninst::InsnAdapter;
46 using namespace Dyninst::ParseAPI;
47
48 class zeroAllGPRegisters : public InstructionAPI::Visitor
49 {
50     public:
51     zeroAllGPRegisters(Address ip) : defined(true), m_ip(ip) {}
52     virtual ~zeroAllGPRegisters() {}
53     bool defined;
54     std::deque<long> results;
55     Address m_ip;
56     long getResult() {
57         if(results.empty()) return 0;
58         return results.front();
59     }
60     bool isDefined() {
61         return defined && (results.size() == 1);
62     }
63     virtual void visit(BinaryFunction* b)
64     {
65         if(!defined) return;
66         long arg1 = results.back();
67         results.pop_back();
68         long arg2 = results.back();
69         results.pop_back();
70         if(b->isAdd())
71         {
72             results.push_back(arg1+arg2);
73         }
74         else if(b->isMultiply())
75         {
76             results.push_back(arg1*arg2);
77         }
78         else
79         {
80             defined = false;
81         }
82     }
83     virtual void visit(Immediate* i)
84     {
85         if(!defined) return;
86         results.push_back(i->eval().convert<long>());
87     }
88     virtual void visit(RegisterAST* r)
89     {
90         if(!defined) return;
91         if(r->getID() == x86::eip ||
92            r->getID() == x86_64::eip ||
93            r->getID() == x86_64::rip)
94         {
95             results.push_back(m_ip);
96             return;
97         }
98         results.push_back(0);
99     }
100     virtual void visit(Dereference* )
101     {
102         defined = false;
103     }
104     
105 };
106
107 bool IA_IAPI::isFrameSetupInsn(Instruction::Ptr i) const
108 {
109     if(i->getOperation().getID() == e_mov)
110     {
111         if(i->isRead(stackPtr[_isrc->getArch()]) &&
112            i->isWritten(framePtr[_isrc->getArch()]))
113         {
114             return true;
115         }
116     }
117     return false;
118 }
119
120 bool IA_IAPI::isNop() const
121 {
122     Instruction::Ptr ci = curInsn();
123
124     // TODO: add LEA no-ops
125     assert(ci);
126     if(ci->getOperation().getID() == e_nop)
127         return true;
128     if(ci->getOperation().getID() == e_lea)
129     {
130         std::set<Expression::Ptr> memReadAddr;
131         ci->getMemoryReadOperands(memReadAddr);
132         std::set<RegisterAST::Ptr> writtenRegs;
133         ci->getWriteSet(writtenRegs);
134         
135         if(memReadAddr.size() == 1 && writtenRegs.size() == 1)
136         {
137             if(**(memReadAddr.begin()) == **(writtenRegs.begin()))
138             {
139                 return true;
140             }
141             // TODO: check for zero displacement--do we want to bind here?
142         }
143     }
144     return false;
145 }
146
147 bool IA_IAPI::isMovAPSTable(std::vector<std::pair< Address, EdgeTypeEnum > >& outEdges) const
148 {
149     /**
150     * AMD-64 gcc emits a re-accuring idiom of:
151      *         jmpq   *%r8
152      *         movaps %xmm7,-0xf(%rax)
153      *         movaps %xmm6,-0x1f(%rax)
154      *         movaps %xmm5,-0x2f(%rax)
155      *         movaps %xmm4,-0x3f(%rax)
156      *         movaps %xmm3,-0x4f(%rax)
157      *         movaps %xmm2,-0x5f(%rax)
158      *         movaps %xmm1,-0x6f(%rax)
159      *         movaps %xmm0,-0x7f(%rax)
160      *         <other>
161      *
162      * The jump register is calculated in such a way that it'll be difficult
163      * for our analysis to figure it out.  Instead we'll recognize the pattern
164      * of the 'movaps'.  Note that the following instruction is also a valid jump
165      * target
166      **/
167     parsing_printf("\tChecking for movaps table at 0x%lx...\n", current);
168     std::set<Address> found;
169     const unsigned char *bufferBegin =
170             (const unsigned char *)_isrc->getPtrToInstruction(current);
171     if( bufferBegin == NULL ) {
172         parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",
173                        FILE__, __LINE__);
174         return false;
175     }
176
177     unsigned int size = (_cr->offset() + _cr->length()) - current;
178     InstructionDecoder d(bufferBegin, size, _isrc->getArch());
179     Address cur = current;
180     unsigned last_insn_size = 0;
181     InstructionAPI::Instruction::Ptr i = d.decode();
182     cur += i->size();
183     InstructionAPI::Instruction::Ptr insn;
184     while (NULL != (insn = d.decode())) {
185         //All insns in sequence are movaps
186         parsing_printf("\t\tChecking instruction %s\n", insn->format().c_str());
187         if (insn->getOperation().getID() != e_movapd &&
188             insn->getOperation().getID() != e_movaps)
189         {
190             break;
191         }
192         //All insns are same size
193         if (last_insn_size == 0)
194             last_insn_size = insn->size();
195         else if (last_insn_size != insn->size())
196             break;
197
198         found.insert(cur);
199
200         cur += insn->size();
201     }
202     if (found.size() == 8) {
203         found.insert(cur);
204         //It's a match
205         for (std::set<Address>::iterator i=found.begin(); i != found.end(); i++) {
206             outEdges.push_back(std::make_pair(*i, INDIRECT));
207         }
208         parsing_printf("\tfound\n");
209         return true;
210     }
211     parsing_printf("\tnot found (%d insns)\n", found.size());
212     return false;
213 }
214
215 bool IA_IAPI::isTableInsn(Instruction::Ptr i) const
216 {
217     if(i->getOperation().getID() == e_mov && i->readsMemory() && !i->writesMemory())
218     {
219         return true;
220     }
221     if(i->getOperation().getID() == e_lea)
222     {
223         return true;
224     }
225     return false;
226 }
227         
228 std::map<Address, Instruction::Ptr>::const_iterator IA_IAPI::findTableInsn() const
229 {
230     // Check whether the jump is our table insn!
231     Expression::Ptr cft = curInsn()->getControlFlowTarget();
232     if(cft)
233     {
234         std::vector<InstructionAST::Ptr> tmp;
235         cft->getChildren(tmp);
236         if(tmp.size() == 1)
237         {
238             Expression::Ptr cftAddr = dyn_detail::boost::dynamic_pointer_cast<Expression>(tmp[0]);
239             zeroAllGPRegisters z(current);
240             cftAddr->apply(&z);
241             parsing_printf("\tChecking indirect jump %s for table insn\n", curInsn()->format().c_str());
242             if(z.isDefined() && z.getResult())
243             {
244                 parsing_printf("\tAddress in jump\n");
245                 return allInsns.find(current);
246             }
247         }
248     }
249     std::map<Address, Instruction::Ptr>::const_iterator c =
250             allInsns.find(current);
251     while(!isTableInsn(c->second) && c != allInsns.begin())
252     {
253         --c;
254     }
255     if(isTableInsn(c->second))
256     {
257         return c;
258     }
259     return allInsns.end();
260 }
261         
262 // This should only be called on a known indirect branch...
263 bool IA_IAPI::parseJumpTable(Block* currBlk,
264     std::vector<std::pair< Address, EdgeTypeEnum> >& outEdges) const
265 {
266     if(isIPRelativeBranch())
267     {
268         return false;
269     }
270
271     // FIXME we should only care about intraprocedural edges
272     Block::edgelist & sourceEdges = currBlk->sources();
273     if(sourceEdges.empty())
274         return false;
275     
276     if(isMovAPSTable(outEdges))
277     {
278         return true;
279     }
280     bool foundJCCAlongTaken = false;
281     Instruction::Ptr tableInsn;
282     Address tableInsnAddr;
283     std::map<Address, Instruction::Ptr>::const_iterator tableLoc = findTableInsn();
284     if(tableLoc == allInsns.end())
285     {
286         parsing_printf("\tunable to find table insn\n");
287         return false;
288     }
289     tableInsnAddr = tableLoc->first;
290     tableInsn = tableLoc->second;
291     Instruction::Ptr maxSwitchInsn, branchInsn;
292     boost::tie(maxSwitchInsn, branchInsn, foundJCCAlongTaken) = findMaxSwitchInsn(currBlk);
293     if(!maxSwitchInsn || !branchInsn)
294     {
295         parsing_printf("\tunable to fix max switch size\n");
296         return false;
297     }
298     Address thunkOffset;
299     Address thunkInsnAddr;
300     boost::tie(thunkInsnAddr, thunkOffset) = findThunkAndOffset(currBlk);
301     if(thunkInsnAddr != 0)
302     {
303         std::map<Address, Instruction::Ptr>::const_iterator thunkLoc =
304                 allInsns.find(thunkInsnAddr);
305         if(thunkLoc != allInsns.end())
306         {
307             tableLoc = thunkLoc;
308             tableInsnAddr = thunkInsnAddr;
309             tableInsn = thunkLoc->second;
310         }
311     }
312     parsing_printf("\ttableInsn %s at 0x%lx\n",tableInsn->format().c_str(), tableInsnAddr);
313     if(thunkOffset) {
314         parsing_printf("\tThunk-calculated table base address: 0x%lx\n",
315                        thunkOffset);
316     }
317     unsigned tableSize = 0, tableStride = 0;
318     bool ok = computeTableBounds(maxSwitchInsn, branchInsn, tableInsn, foundJCCAlongTaken,
319                                  tableSize, tableStride);
320     if(!ok)
321     {
322         return false;
323     }
324     std::map<Address, Instruction::Ptr>::const_iterator cur = allInsns.find(current);
325     
326     while(tableLoc != cur)
327     {
328         tableLoc++;
329         if(tableLoc->second->getOperation().getID() == e_lea)
330         {
331             parsing_printf("\tchecking instruction %s at 0x%lx for IP-relative LEA\n", tableLoc->second->format().c_str(),
332                            tableLoc->first);
333             Expression::Ptr IPRelAddr = tableLoc->second->getOperand(1).getValue();
334             IPRelAddr->bind(thePC[_isrc->getArch()].get(), Result(s64, tableLoc->first + tableLoc->second->size()));
335             Result iprel = IPRelAddr->eval();
336             if(iprel.defined)
337             {
338                 parsing_printf("\trevising tableInsn to %s at 0x%lx\n",tableLoc->second->format().c_str(), tableLoc->first);
339                 tableInsn = tableLoc->second;
340                 tableInsnAddr = tableLoc->first;
341             }
342
343         }
344         else
345         {
346             parsing_printf("\tChecking for sign-extending mov at 0x%lx...\n", tableLoc->first);
347             if(tableLoc->second->getOperation().getID() == e_movsxd ||
348                tableLoc->second->getOperation().getID() == e_movsx)
349             {
350                 std::set<Expression::Ptr> movsxReadAddr;
351                 tableLoc->second->getMemoryReadOperands(movsxReadAddr);
352
353                 if(movsxReadAddr.empty()) {
354                     // should be a register-register movsx[d]
355                     // from either 16- or 32-bit source operand
356                     Expression::Ptr op1 = 
357                         tableLoc->second->getOperand(0).getValue(); 
358                     Expression::Ptr op2 = 
359                         tableLoc->second->getOperand(1).getValue();
360             
361                     if(op1 && op2) {
362                         int sz1 = op1->eval().size();
363                         int sz2 = op2->eval().size();
364                         parsing_printf("\t\tfound %d-byte to %d-byte move, revised stride to %d\n",
365                         
366                             sz2,sz1,sz2);
367                         tableStride = sz2;
368                     }
369                 }
370
371                 static Immediate::Ptr four(new Immediate(Result(u8, 4)));
372                 static Expression::Ptr dummy4(new DummyExpr());
373                 static Expression::Ptr dummy2(new DummyExpr());
374                 static Immediate::Ptr two(new Immediate(Result(u8, 2)));
375                 static BinaryFunction::funcT::Ptr multiplier(new BinaryFunction::multResult());
376                 static BinaryFunction::Ptr scaleCheck4(new BinaryFunction(four, dummy4, (tableStride == 8) ? u64: u32,
377                         multiplier));
378                 static BinaryFunction::Ptr scaleCheck2(new BinaryFunction(two, dummy2, (tableStride == 8) ? u64: u32,
379                         multiplier));
380                 for(std::set<Expression::Ptr>::const_iterator readExp =
381                     movsxReadAddr.begin();
382                     readExp != movsxReadAddr.end();
383                     ++readExp)
384                 {
385                     if((*readExp)->isUsed(scaleCheck4))
386                     {
387                         parsing_printf("\tFound sign-extending mov, revising table stride to scale (4)\n");
388                         tableStride = 4;
389                     }
390                     else if((*readExp)->isUsed(scaleCheck2))
391                     {
392                         parsing_printf("\tFound sign-extending mov, revising table stride to scale (2)\n");
393                         tableStride = 2;
394                     }
395                     else
396                     {
397                         parsing_printf("\tFound sign-extending mov insn %s, readExp %s\n",
398                                        tableLoc->second->format().c_str(), (*readExp)->format().c_str());
399                         parsing_printf("\tcouldn't match stride expression %s or %s--HELP!\n",
400                                        scaleCheck2->format().c_str(), scaleCheck4->format().c_str());
401                     }
402                 }
403                 break;
404             }
405         }
406     }
407     Address tableBase = getTableAddress(tableInsn, tableInsnAddr, thunkOffset);
408     return fillTableEntries(thunkOffset, tableBase,
409                             tableSize, tableStride, outEdges);
410 }
411 namespace detail
412 {
413     bool isNonCallEdge(ParseAPI::Edge* e)
414     {
415         return e->type() != CALL;
416     }
417     bool leadsToVisitedBlock(ParseAPI::Edge* e, const std::set<Block*>& visited)
418     {
419         Block* src = e->src();
420         return visited.find(src) != visited.end();
421     }
422 };
423
424
425 Address IA_IAPI::findThunkInBlock(Block* curBlock, Address& thunkOffset) const
426 {
427     const unsigned char* buf =
428             (const unsigned char*)(_isrc->getPtrToInstruction(curBlock->start()));
429     if( buf == NULL ) {
430         parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",
431                        FILE__, __LINE__);
432         return false;
433     }
434     
435     InstructionDecoder dec(buf,curBlock->size() + InstructionDecoder::maxInstructionLength,
436                            _isrc->getArch());
437     IA_IAPI * blockptr = NULL;
438     blockptr = new IA_IAPI(dec,curBlock->start(),_obj,_cr,_isrc);
439     IA_IAPI & block = *blockptr;
440
441     parsing_printf("\tchecking block at 0x%lx for thunk\n", curBlock->start());
442     while(block.getAddr() < curBlock->end())
443     {
444         if(block.getInstruction()->getCategory() == c_CallInsn)
445         {
446             parsing_printf("\tchecking call at 0x%lx for thunk\n", block.getAddr());
447             if(!block.isRealCall())
448             {
449                 parsing_printf("\tthunk found at 0x%lx, checking for add\n", block.getAddr());
450                 block.advance();
451                 thunkOffset = block.getAddr();
452                 Instruction::Ptr addInsn = block.getInstruction();
453                 if(addInsn)
454                     parsing_printf("\tinsn after thunk: %s\n", addInsn->format().c_str());
455                 else
456                     parsing_printf("\tNO INSN after thunk at 0x%lx\n", thunkOffset);
457                 if(addInsn && (addInsn->getOperation().getID() == e_add))
458                 {
459                     Result imm = addInsn->getOperand(1).getValue()->eval();
460                     Result imm2 = addInsn->getOperand(0).getValue()->eval();
461                     if(imm.defined)
462                     {
463                         Address thunkDiff = imm.convert<Address>();
464                         parsing_printf("\tsetting thunkOffset to 0x%lx (0x%lx + 0x%lx)\n",
465                                        thunkOffset+thunkDiff, thunkOffset, thunkDiff);
466                         Address ret = block.getPrevAddr();
467                         thunkOffset = thunkOffset + thunkDiff;
468                         delete blockptr;
469                         return ret;
470                     }
471                     else if(imm2.defined)
472                     {
473                         Address thunkDiff = imm2.convert<Address>();
474                         parsing_printf("\tsetting thunkOffset to 0x%lx (0x%lx + 0x%lx)\n",
475                                        thunkOffset+thunkDiff, thunkOffset, thunkDiff);
476                         Address ret = block.getPrevAddr();
477                         thunkOffset = thunkOffset + thunkDiff;
478                         delete blockptr;
479                         return ret;
480                     }
481                     else
482                     {
483                         parsing_printf("\tadd insn %s found following thunk at 0x%lx, couldn't bind operands!\n",
484                                        addInsn->format().c_str(), thunkOffset);
485                     }
486                 }
487                 thunkOffset = 0;
488             }
489         }
490         else if(block.getInstruction()->getOperation().getID() == e_lea)
491             // Look for an AMD64 IP-relative LEA.  If we find one, it should point to the start of a
492         {    // relative jump table.
493             parsing_printf("\tchecking instruction %s at 0x%lx for IP-relative LEA\n", block.getInstruction()->format().c_str(),
494                            block.getAddr());
495             Expression::Ptr IPRelAddr = block.getInstruction()->getOperand(1).getValue();
496             IPRelAddr->bind(thePC[_isrc->getArch()].get(), Result(s64, block.getNextAddr()));
497             Result iprel = IPRelAddr->eval();
498             if(iprel.defined)
499             {
500                 thunkOffset = iprel.convert<Address>();
501                 parsing_printf("\tsetting thunkOffset to 0x%lx at 0x%lx\n",thunkOffset, block.getAddr());
502                 Address ret = block.getAddr();
503                 delete blockptr;
504                 return ret;
505             }
506         }
507         block.advance();
508     }
509     delete blockptr;
510     return 0;
511 }
512
513
514 std::pair<Address, Address> IA_IAPI::findThunkAndOffset(Block* start) const
515 {
516     std::set<Block*> visited;
517     std::deque<Block*> worklist;
518     Block* curBlock;
519     worklist.insert(worklist.begin(), start);
520     visited.insert(start);
521     Address thunkOffset = 0;
522
523     // traverse only intraprocedural edges
524     Intraproc epred;
525
526     while(!worklist.empty())
527     {
528         curBlock = worklist.front();
529         worklist.pop_front();
530         // If the only thunk we can find within this function that precedes the
531         // indirect jump in control flow (we think) is after it in the address space,
532         // it may be a jump table but it's not one our heuristics should expect to
533         // handle well.  Better to bail out than try to parse something that will be garbage.
534         if(curBlock->start() > start->start()) continue;
535         Address tableInsnAddr = findThunkInBlock(curBlock, thunkOffset);
536         if(tableInsnAddr != 0)
537         {
538             return std::make_pair(tableInsnAddr, thunkOffset);
539         }
540
541         Block::edgelist::iterator sit = curBlock->sources().begin(&epred);
542         for( ; sit != curBlock->sources().end(); ++sit) {
543             ParseAPI::Edge *e = *sit;
544
545             // FIXME debugging assert
546             assert(detail::isNonCallEdge(e));
547
548             // FIXME check this algorithm... O(log n) lookup in visited
549             if(!detail::leadsToVisitedBlock(e, visited))
550             {
551                 worklist.push_back(e->src());
552                 visited.insert(e->src());
553             }
554         }
555     }
556     return std::make_pair(0, 0);
557
558 }
559
560 boost::tuple<Instruction::Ptr,
561  Instruction::Ptr,
562  bool> IA_IAPI::findMaxSwitchInsn(Block *start) const
563 {
564     std::set<Block *> visited;
565     std::vector<Block *> WL;
566     Block *curBlk;
567     int depth = 0;
568
569     bool foundMaxSwitch = false;
570     bool foundCondBranch = false;
571     Address maxSwitchAddr = 0;
572
573     WL.push_back(start);
574     Instruction::Ptr compareInsn, condBranchInsn;
575     bool compareOnTakenBranch = false;
576     for(unsigned j=0;j < WL.size(); j++)
577     {
578         curBlk = WL[j];
579         visited.insert(curBlk);
580
581         foundMaxSwitch = false;
582         foundCondBranch = false;
583         const unsigned char* buf =
584                 (const unsigned char*)(_isrc->getPtrToInstruction(curBlk->start()));
585         if( buf == NULL ) {
586             parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",
587                            FILE__, __LINE__);
588             return boost::make_tuple(Instruction::Ptr(), Instruction::Ptr(), false);
589         }
590         InstructionDecoder dec(buf, curBlk->size(), _isrc->getArch());
591         Instruction::Ptr i;
592         Address curAdr = curBlk->start();
593         while(i = dec.decode())
594         {
595             if(i->getCategory() == c_CompareInsn)
596             // check for cmp
597             {
598                 parsing_printf("\tFound jmp table cmp instruction %s at 0x%lx\n",
599                                i->format().c_str(), curAdr);
600                 compareInsn = i;
601                 maxSwitchAddr = curAdr;
602                 foundMaxSwitch = true;
603             }
604             if(i->getCategory() == c_BranchInsn &&
605                i->allowsFallThrough())
606             {
607                 parsing_printf("\tFound jmp table cond br instruction %s at 0x%lx\n",
608                                i->format().c_str(), curAdr);
609                 condBranchInsn = i;
610                 foundCondBranch = true;
611
612                 Block::edgelist::iterator tit = curBlk->targets().begin();
613                 bool taken_hit = false;
614                 bool fallthrough_hit = false;
615                 for ( ; tit != curBlk->targets().end(); ++tit) {
616                     ParseAPI::Edge *t = *tit;
617                     if (t->type() == COND_TAKEN &&
618                         (visited.find(t->trg()) != visited.end()))
619                     {
620                         taken_hit = true;
621                     }
622                     if ((t->type() == COND_NOT_TAKEN ||
623                          t->type() == FALLTHROUGH) &&
624                          (visited.find(t->trg()) != visited.end()))
625                     {
626                         fallthrough_hit = true;
627                     }
628                 }
629                 parsing_printf("\tfindMaxSwitchInsn: taken_hit: %d, fallthrough_hit: %d\n", taken_hit, fallthrough_hit);
630                 compareOnTakenBranch = taken_hit && !fallthrough_hit;
631                 break;
632             }
633             curAdr += i->size();
634         }
635
636         if(foundMaxSwitch && foundCondBranch)
637             break; // done
638
639             // look further back
640         Block::edgelist::iterator sit = curBlk->sources().begin();
641         depth++;
642             // We've seen depth 2 in libc et al
643         if(depth > 2) return boost::make_tuple(Instruction::Ptr(), Instruction::Ptr(), false);
644            
645         for( ; sit != curBlk->sources().end(); ++sit) 
646         {
647             ParseAPI::Edge * s = *sit;
648
649             // ignore return edges
650             if(s->type() == RET)
651                 continue;
652
653             if(s->type() == CALL)
654                 return boost::make_tuple(Instruction::Ptr(), Instruction::Ptr(), false);
655
656             Block * src = s->src();
657             if( (visited.find( src ) == visited.end())) {
658                 WL.push_back(src);
659             }
660         }
661     }
662     WL.clear();
663     parsing_printf("\tfindMaxSwitchInsn: table on taken branch: %d, returning: %d\n", compareOnTakenBranch, foundMaxSwitch &&
664             foundCondBranch);
665     
666     return boost::make_tuple(compareInsn, condBranchInsn, compareOnTakenBranch);
667 }
668  
669 Address IA_IAPI::getTableAddress(Instruction::Ptr tableInsn, 
670     Address tableInsnAddr,
671     Address thunkOffset) const
672 {
673     // Extract displacement from table insn
674     Expression::Ptr displacementSrc;
675     if(tableInsn->getCategory() == c_BranchInsn)
676     {
677         Expression::Ptr op = tableInsn->getOperand(0).getValue();
678         std::vector<InstructionAST::Ptr> tmp;
679         op->getChildren(tmp);
680         if(tmp.empty())
681         {
682             displacementSrc = op;
683         }
684         else
685         {
686             displacementSrc = dyn_detail::boost::dynamic_pointer_cast<Expression>(tmp[0]);
687         }
688     }
689     else
690     {
691         parsing_printf("\tcracking table instruction %s\n", tableInsn->format().c_str());
692         std::vector<InstructionAST::Ptr> tmp;
693         Expression::Ptr op = tableInsn->getOperand(1).getValue();
694         if(!op)
695         {
696             parsing_printf("\ttable insn BAD! (no second operand)\n");
697             return 0;
698         }
699         if(tableInsn->getOperation().getID() != e_lea)
700         {
701             op->getChildren(tmp);
702             if(tmp.empty())
703             {
704                 parsing_printf("\ttable insn BAD! (not LEA, second operand not a deref)\n");
705                 return 0;
706             }
707             displacementSrc = dyn_detail::boost::dynamic_pointer_cast<Expression>(tmp[0]);
708         }
709         else
710         {
711             displacementSrc = op;
712         }
713     }
714     
715     zeroAllGPRegisters z(tableInsnAddr + tableInsn->size());
716     displacementSrc->apply(&z);
717     
718     Address jumpTable = 0;
719     //if(!disp.defined)
720     if(!z.isDefined())
721     {
722         if(!thunkOffset)
723         {
724             parsing_printf("\ttable insn: %s, displacement %s, bind of all GPRs FAILED\n",
725                            tableInsn->format().c_str(), displacementSrc->format().c_str());
726             return 0;
727         }
728     }
729     else
730     {
731 //        jumpTable = disp.convert<Address>();
732         jumpTable = z.getResult();
733     }
734     parsing_printf("\tjumpTable set to 0x%lx\n",jumpTable);
735
736     if(!jumpTable && !thunkOffset)
737     {
738         return 0;
739     }
740
741     // in AMD64 rip-relative LEA case, jumpTable and thunkOffset are
742     // calculated from the same instruction (FIXME this indicates
743     // a flaw in the overall logic which is corrected here)
744     if(jumpTable != thunkOffset) {
745         jumpTable += thunkOffset;
746         parsing_printf("\tjumpTable revised to 0x%lx\n",jumpTable);
747     }
748     // On Windows, all of our other addresses have had the base address
749     // of their module subtracted off before we ever use them.  We need to fix this up
750     // to conform to that standard so that Symtab actually believes that there's code
751     // at the table's destination.
752 #if defined(os_windows)
753         jumpTable -= _obj->cs()->loadAddress();
754 #endif
755     if( !_isrc->isValidAddress(jumpTable) )
756 {
757         // If the "jump table" has a start address that is outside
758         // of the valid range of the binary, we can say with high
759         // probability that we have misinterpreted some other
760         // construct (such as a function pointer comparison & tail
761         // call, for example) as a jump table. Give up now.
762     return 0;
763 }
764     return jumpTable;
765 }
766
767 bool IA_IAPI::fillTableEntries(Address thunkOffset,
768                                Address tableBase,
769                                unsigned tableSize,
770                                unsigned tableStride,
771                                std::vector<std::pair< Address, EdgeTypeEnum> >& outEdges) const
772 {
773     if( !_isrc->isValidAddress(tableBase) )
774     {
775         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", tableBase);
776         return false;
777     }
778           
779     for(unsigned int i=0; i < tableSize; i++)
780     {
781         Address tableEntry = tableBase + (i * tableStride);
782         Address jumpAddress = 0;
783                 
784         if(_isrc->isValidAddress(tableEntry))
785         {
786             if(tableStride == sizeof(Address)) {
787                                         // Unparseable jump table
788                 jumpAddress = *(const Address *)
789                     _isrc->getPtrToInstruction(tableEntry);
790                 if( 0 == jumpAddress ) {
791                     parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",
792                                    FILE__, __LINE__);
793                     return false;
794                 }
795             }
796             else {
797                 jumpAddress = *(const int *)
798                     _isrc->getPtrToInstruction(tableEntry);
799                 if( 0 == jumpAddress ) {
800                     parsing_printf("%s[%d]: failed to get pointer to "
801                         "instruction by offset\n", FILE__, __LINE__);
802                     return false;
803                 }
804             }
805         }
806 #if defined(os_windows)
807         jumpAddress -= _obj->cs()->loadAddress();
808 #endif
809         if(thunkOffset && _isrc->isCode(jumpAddress + thunkOffset))
810         {
811             // XXX We assume that if thunkOffset is set that
812             //     entries in the table are relative, but only
813             //     if the absolute address would be valid
814             jumpAddress += thunkOffset;
815         }
816         else if(!(_isrc->isCode(jumpAddress))) {
817             parsing_printf("\tentry %d [0x%lx] -> 0x%lx, invalid, skipping\n",
818                                    i, tableEntry, jumpAddress);
819                     continue;
820         }
821         parsing_printf("\tentry %d [0x%lx] -> 0x%lx\n",i,tableEntry,jumpAddress);
822                 outEdges.push_back(std::make_pair(jumpAddress, INDIRECT));
823                 
824     }
825     if(outEdges.empty()) parsing_printf("\tno valid table entries, ret false\n");
826     return !outEdges.empty();
827     
828 }
829
830
831 bool IA_IAPI::computeTableBounds(Instruction::Ptr maxSwitchInsn,
832                                  Instruction::Ptr branchInsn,
833                                  Instruction::Ptr tableInsn,
834                                  bool foundJCCAlongTaken,
835                                  unsigned& tableSize,
836                                  unsigned& tableStride) const
837 {
838     assert(maxSwitchInsn && branchInsn);
839     Result compareBound = maxSwitchInsn->getOperand(1).getValue()->eval();
840     if(!compareBound.defined) return false;
841     tableSize = compareBound.convert<unsigned>();
842     // Sanity check the bounds; 32k tables would be an oddity, and larger is almost certainly
843     // a misparse
844     static const unsigned int maxTableSize = 32768;
845     if(tableSize > maxTableSize)
846     {
847         parsing_printf("\tmaxSwitch of %d above %d, BAILING OUT\n", tableSize, maxTableSize);
848         return false;
849     }
850     if(foundJCCAlongTaken)
851     {
852         if(branchInsn->getOperation().getID() == e_jbe ||
853            branchInsn->getOperation().getID() == e_jle)
854         {
855             tableSize++;
856         }
857     }
858     else
859     {
860         if(branchInsn->getOperation().getID() == e_jnbe ||
861            branchInsn->getOperation().getID() == e_jnle)
862         {
863             tableSize++;
864         }
865     }
866     parsing_printf("\tmaxSwitch set to %d\n", tableSize);
867     tableStride = _isrc->getAddressWidth();
868     std::set<Expression::Ptr> tableInsnReadAddr;
869     tableInsn->getMemoryReadOperands(tableInsnReadAddr);
870     if(tableStride == 8)
871     {
872         static Immediate::Ptr four(new Immediate(Result(u8, 4)));
873         static BinaryFunction::funcT::Ptr multiplier(new BinaryFunction::multResult());
874         static Expression::Ptr dummy(new DummyExpr());
875         static BinaryFunction::Ptr scaleCheck(new BinaryFunction(four, dummy, u64, multiplier));
876         for(std::set<Expression::Ptr>::const_iterator curExpr = tableInsnReadAddr.begin();
877             curExpr != tableInsnReadAddr.end();
878             ++curExpr)
879         {
880             if((*curExpr)->isUsed(scaleCheck))
881             {
882                 tableSize = tableSize >> 1;
883                 parsing_printf("\tmaxSwitch revised to %d\n",tableSize);
884             }
885         }
886     }
887     return true;
888 }
889
890 bool IA_IAPI::isThunk() const {
891   // Before we go a-wandering, check the target
892     if (!_isrc->isValidAddress(getCFT()))
893     {
894         parsing_printf("... Call to 0x%lx is invalid (outside code or data)\n",
895                        getCFT());
896         return false;
897     }
898
899     const unsigned char *target =
900             (const unsigned char *)_isrc->getPtrToInstruction(getCFT());
901   // We're decoding two instructions: possible move and possible return.
902   // Check for move from the stack pointer followed by a return.
903     InstructionDecoder targetChecker(target, 
904             2*InstructionDecoder::maxInstructionLength, _isrc->getArch());
905     Instruction::Ptr thunkFirst = targetChecker.decode();
906     Instruction::Ptr thunkSecond = targetChecker.decode();
907     parsing_printf("... checking call target for thunk, insns are %s, %s\n", thunkFirst->format().c_str(),
908                    thunkSecond->format().c_str());
909     if(thunkFirst && (thunkFirst->getOperation().getID() == e_mov))
910     {
911         if(thunkFirst->isRead(stackPtr[_isrc->getArch()]))
912         {
913             parsing_printf("... checking second insn\n");
914             if(!thunkSecond) {
915                 parsing_printf("...no second insn\n");
916                 return false;
917             }
918             if(thunkSecond->getCategory() != c_ReturnInsn)
919             {
920                 parsing_printf("...insn %s not a return\n", thunkSecond->format().c_str());
921                 return false;
922             }
923             return true;
924         }
925     }
926     parsing_printf("... real call found\n");
927     return false;
928 }
929
930 bool IA_IAPI::isTailCall(Function * /*context*/,unsigned int) const
931 {
932     if(tailCall.first) {
933         parsing_printf("\tReturning cached tail call check result: %d\n", tailCall.second);
934         return tailCall.second;
935     }
936     tailCall.first = true;
937
938     if(curInsn()->getCategory() == c_BranchInsn &&
939        _obj->findFuncByEntry(_cr,getCFT()))
940     {
941         parsing_printf("\tjump to 0x%lx, TAIL CALL\n", getCFT());
942         tailCall.second = true;
943         return tailCall.second;
944     }
945
946     if(allInsns.size() < 2) {
947         tailCall.second = false;
948         parsing_printf("\ttoo few insns to detect tail call\n");
949         return tailCall.second;
950     }
951
952     if(curInsn()->getCategory() == c_BranchInsn ||
953        curInsn()->getCategory() == c_CallInsn)
954     {
955         std::map<Address, Instruction::Ptr>::const_iterator prevIter =
956                 allInsns.find(current);
957         --prevIter;
958         Instruction::Ptr prevInsn = prevIter->second;
959         if(prevInsn->getOperation().getID() == e_leave)
960         {
961             parsing_printf("\tprev insn was leave, TAIL CALL\n");
962             tailCall.second = true;
963             return tailCall.second;
964         }
965         if(prevInsn->getOperation().getID() == e_pop)
966         {
967             if(prevInsn->isWritten(framePtr[_isrc->getArch()]))
968             {
969                 parsing_printf("\tprev insn was %s, TAIL CALL\n", prevInsn->format().c_str());
970                 tailCall.second = true;
971                 return tailCall.second;
972             }
973             parsing_printf("\tprev insn was %s, not tail call\n", prevInsn->format().c_str());
974         }
975     }
976     tailCall.second = false;
977     return tailCall.second;
978 }
979
980 bool IA_IAPI::savesFP() const
981 {
982     Instruction::Ptr ci = curInsn();
983     if(ci->getOperation().getID() == e_push)
984     {
985         return(ci->isRead(framePtr[_isrc->getArch()]));
986     }
987     return false;
988 }
989
990 bool IA_IAPI::isStackFramePreamble() const
991 {
992     if(savesFP())
993     {
994         InstructionDecoder tmp(dec);
995         std::vector<Instruction::Ptr> nextTwoInsns;
996         nextTwoInsns.push_back(tmp.decode());
997         nextTwoInsns.push_back(tmp.decode());
998         if(isFrameSetupInsn(nextTwoInsns[0]) ||
999            isFrameSetupInsn(nextTwoInsns[1]))
1000         {
1001             return true;
1002         }
1003     }
1004     return false;
1005 }
1006
1007 bool IA_IAPI::cleansStack() const
1008 {
1009     Instruction::Ptr ci = curInsn();
1010     return (ci->getCategory() == c_ReturnInsn) &&
1011             ci->getOperand(0).getValue();
1012
1013 }
1014
1015 bool IA_IAPI::isReturnAddrSave() const
1016 {
1017     // not implemented on non-power
1018     return false;
1019 }
1020
1021
1022 //class ST_Predicates : public Slicer::Predicates {};
1023
1024 // returns stackTamper, which is false if parsing should not resume 
1025 // after call instructions to this function.  
1026 // The function recommends parsing at an alternative address if the stack 
1027 // delta is a known absolute or relative value, otherwise we will instrument
1028 // this function's return instructions to see if the function returns
1029 StackTamper IA_IAPI::tampersStack(ParseAPI::Function *, 
1030                                   Address &) const
1031 {
1032     return TAMPER_NONE;
1033
1034 #if 0
1035
1036     using namespace SymbolicEvaluation;
1037     if (TAMPER_UNSET != func->stackTamper()) {
1038         return func->stackTamper();
1039     }
1040
1041     if ( ! _obj->defensiveMode() ) { 
1042         return TAMPER_NONE;
1043     }
1044
1045     Function::blocklist & retblks = func->returnBlocks();
1046     if ( retblks.begin() == retblks.end() ) {
1047         return TAMPER_NONE;
1048     }
1049
1050     AssignmentConverter converter(true);
1051     vector<Assignment::Ptr> assgns;
1052     ST_Predicates preds;
1053     StackTamper tamper = TAMPER_UNSET;
1054     //Absloc stkLoc (MachRegister::getStackPointer(_isrc->getArch()));
1055     Function::blocklist::iterator bit;
1056     for (bit = retblks.begin(); retblks.end() != bit; bit++) {
1057         Address retnAddr = (*bit)->lastInsnAddr();
1058         InstructionDecoder retdec( _isrc->getPtrToInstruction( retnAddr ), 
1059                                   InstructionDecoder::maxInstructionLength, 
1060                                   _cr->getArch() );
1061         Instruction::Ptr retn = retdec.decode();
1062         converter.convert(retn, retnAddr, func, assgns);
1063         vector<Assignment::Ptr>::iterator ait;
1064         AST::Ptr sliceAtRet;
1065
1066         for (ait = assgns.begin(); assgns.end() != ait; ait++) {
1067             AbsRegion & outReg = (*ait)->out();
1068             if ( outReg.absloc().isPC() ) {
1069                 Slicer slicer(*ait,*bit,func);
1070                 Graph::Ptr slGraph = slicer.backwardSlice(preds);
1071                 SymEval::Result_t slRes;
1072                 SymEval::expand(slGraph,slRes);
1073                 if (dyn_debug_malware) {
1074                     stringstream graphDump;
1075                     graphDump << "sliceDump_" << func->name() 
1076                               << "_" << retnAddr << ".dot";
1077                     slGraph->printDOT(graphDump.str());
1078                 }
1079                 sliceAtRet = slRes[*ait];
1080                 break;
1081             }
1082         }
1083         assert(sliceAtRet != NULL);
1084         StackTamperVisitor vis((*ait)->out());
1085         tamper = vis.tampersStack(sliceAtRet, tamperAddr);
1086         assgns.clear();
1087     }
1088     return tamper;
1089 #endif
1090 }
1091
1092 /* returns true if the call leads to:
1093  * -an invalid instruction (or immediately branches/calls to an invalid insn)
1094  * -a block not ending in a return instruction that pops the return address 
1095  *  off of the stack
1096  */
1097 bool IA_IAPI::isFakeCall() const
1098 {
1099     assert(_obj->defensiveMode());
1100
1101     // get instruction at entry of new func
1102     bool tampers = false;
1103     Address entry = getCFT();
1104     if ( ! _cr->contains(entry) || ! _isrc->isCode(entry) ) {
1105         mal_printf("WARNING: found call to function at %lx that "
1106                 "redirects to invalid address %lx %s[%d]\n", current, 
1107                 entry, FILE__,__LINE__);
1108         return false;
1109     }
1110     const unsigned char* bufPtr =
1111      (const unsigned char *)(_isrc->getPtrToInstruction(entry));
1112     InstructionDecoder newdec( bufPtr,
1113                               _isrc->offset() + _isrc->length() - entry,
1114                               _cr->getArch() );
1115     IA_IAPI ah(newdec, entry, _obj, _cr, _isrc);
1116     Instruction::Ptr insn = ah.curInsn();
1117
1118     // follow ctrl transfers until you get a block containing non-ctrl 
1119     // transfer instructions, or hit a return instruction
1120     while (insn->getCategory() == c_CallInsn ||
1121            insn->getCategory() == c_BranchInsn) 
1122     {
1123         Address entry = ah.getCFT();
1124         if ( ! _cr->contains(entry) || ! _isrc->isCode(entry) ) {
1125             mal_printf("WARNING: found call to function at %lx that "
1126                     "redirects to invalid address %lx %s[%d]\n", current, 
1127                     entry, FILE__,__LINE__);
1128             return false;
1129         }
1130         bufPtr = (const unsigned char *)(_isrc->getPtrToInstruction(entry));
1131         newdec = InstructionDecoder(bufPtr, 
1132                                     _isrc->offset() + _isrc->length() - entry, 
1133                                     _cr->getArch());
1134         ah = IA_IAPI(newdec, entry, _obj, _cr, _isrc);
1135         insn = ah.curInsn();
1136     }
1137
1138     // calculate instruction stack deltas for the block, leaving the iterator
1139     // at the last ins'n if it's a control transfer, or after calculating the 
1140     // last instruction's delta if we run off the end of initialized memory
1141     int stackDelta = 0;
1142     int addrWidth = _isrc->getAddressWidth();
1143     static Expression::Ptr theStackPtr
1144         (new RegisterAST(MachRegister::getStackPointer(_isrc->getArch())));
1145     Address curAddr = entry;
1146
1147     while(true) {
1148
1149         // exit condition 1
1150         if (insn->getCategory() == c_CallInsn ||
1151             insn->getCategory() == c_ReturnInsn ||
1152             insn->getCategory() == c_BranchInsn) 
1153         {
1154             break;
1155         }
1156
1157         // calculate instruction delta
1158         if(insn->isWritten(theStackPtr)) {
1159             entryID what = insn->getOperation().getID();
1160             int sign = 1;
1161             switch(what) 
1162             {
1163             case e_push:
1164                 sign = -1;
1165             case e_pop: {
1166                 Operand arg = insn->getOperand(0);
1167                 if (arg.getValue()->eval().defined) {
1168                     stackDelta += sign * addrWidth;
1169                 } else {
1170                     assert(0);
1171                 }
1172                 break;
1173             }
1174             case e_pusha:
1175             case e_pushad:
1176                 sign = -1;
1177             case e_popa:
1178             case e_popad:
1179                 stackDelta += sign * 8 * addrWidth;
1180                 break;
1181
1182             case e_pushf:
1183             case e_pushfd:
1184                 sign = -1;
1185             case e_popf:
1186             case e_popfd:
1187                 stackDelta += sign * 4;
1188                 break;
1189
1190             case e_leave:
1191             case e_enter:
1192                 fprintf(stderr, "WARNING: saw leave or enter instruction "
1193                         "at %lx that is not handled by isFakeCall %s[%d]\n",
1194                         curAddr, FILE__,__LINE__);//KEVINTODO: unhandled case
1195             default:
1196                 assert(0);//what stack-writing instruction is this?
1197             }
1198         }
1199
1200         if (stackDelta > 0) {
1201             tampers=true;
1202         }
1203
1204         // exit condition 2
1205         ah.advance();
1206         Instruction::Ptr next = ah.curInsn();
1207         if (NULL == next) {
1208             break;
1209         }
1210         curAddr += insn->size();
1211         insn = next;
1212     } 
1213
1214     // not a fake call if it ends w/ a return instruction
1215     if (insn->getCategory() == c_ReturnInsn) {
1216         return false;
1217     }
1218
1219     // if the stack delta is positive or the return address has been replaced
1220     // with an absolute value, it's a fake call, since in both cases 
1221     // the return address is gone and we cannot return to the caller
1222     if ( 0 < stackDelta || tampers ) {
1223         return true;
1224     }
1225
1226     return tampers;
1227 }
1228
1229 bool IA_IAPI::isIATcall() const
1230 {
1231     if (!isDynamicCall()) {
1232         return false;
1233     }
1234
1235     if (!curInsn()->readsMemory()) {
1236         return false;
1237     }
1238
1239     std::set<Expression::Ptr> memReads;
1240     curInsn()->getMemoryReadOperands(memReads);
1241     if (memReads.size() != 1) {
1242         return false;
1243     }
1244
1245     Result memref = (*memReads.begin())->eval();
1246     if (!memref.defined) {
1247         return false;
1248     }
1249     Address entryAddr = memref.convert<Address>();
1250
1251     // convert to a relative address
1252     if (_obj->cs()->loadAddress() < entryAddr) {
1253         entryAddr -= _obj->cs()->loadAddress();
1254     }
1255     
1256     if (!_obj->cs()->isValidAddress(entryAddr)) {
1257         return false;
1258     }
1259
1260     // calculate the address of the ASCII string pointer, 
1261     // skip over the IAT entry's two-byte hint
1262     Address funcAsciiAddr = 2 + *(Address*) (_obj->cs()->getPtrToData(entryAddr));
1263     if (!_obj->cs()->isValidAddress(funcAsciiAddr)) {
1264         return false;
1265     }
1266
1267     // see if it's really a string that could be a function name
1268     char *funcAsciiPtr = (char*) _obj->cs()->getPtrToData(funcAsciiAddr);
1269     char cur = 'a';
1270     int count=0;
1271     do {
1272         cur = funcAsciiPtr[count];
1273         count++;
1274     }while (count < 100 && 
1275             _obj->cs()->isValidAddress(funcAsciiAddr+count) &&
1276             ((cur >= 'A' && cur <= 'z') ||
1277              (cur >= '0' && cur <= '9')));
1278     if (cur != 0 || count <= 1) 
1279         return false;
1280
1281     return true;
1282 }