Slight optimization: statically construct stack pointer/frame pointer/PC ASTs.
[dyninst.git] / dyninstAPI / src / IA_x86.C
1 /*
2  * Copyright (c) 1996-2009 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as "Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * By your use of Paradyn, you understand and agree that we (or any
12  * other person or entity with proprietary rights in Paradyn) are
13  * under no obligation to provide either maintenance services,
14  * update services, notices of latent defects, or correction of
15  * defects for Paradyn.
16  * 
17  * This library is free software; you can redistribute it and/or
18  * modify it under the terms of the GNU Lesser General Public
19  * License as published by the Free Software Foundation; either
20  * version 2.1 of the License, or (at your option) any later version.
21  * 
22  * This library is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * Lesser General Public License for more details.
26  * 
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30  */
31
32
33 #include "IA_IAPI.h"
34
35 #include "Register.h"
36 #include "Dereference.h"
37 #include "Immediate.h"
38 #include "BinaryFunction.h"
39 #include "debug.h"
40 #include "symtab.h"
41
42
43 #include <deque>
44
45 using namespace Dyninst;
46 using namespace InstructionAPI;
47
48 class zeroAllRegisters : public InstructionAPI::Visitor
49 {
50     public:
51     zeroAllRegisters() : defined(true) {}
52     virtual ~zeroAllRegisters() {}
53     bool defined;
54     std::deque<long> results;
55     long getResult() {
56         if(results.empty()) return 0;
57         return results.front();
58     }
59     bool isDefined() {
60         return defined && (results.size() == 1);
61     }
62     virtual void visit(BinaryFunction* b)
63     {
64         if(!defined) return;
65         long arg1 = results.back();
66         results.pop_back();
67         long arg2 = results.back();
68         results.pop_back();
69         if(b->isAdd())
70         {
71             results.push_back(arg1+arg2);
72         }
73         else if(b->isMultiply())
74         {
75             results.push_back(arg1*arg2);
76         }
77         else
78         {
79             defined = false;
80         }
81     }
82     virtual void visit(Immediate* i)
83     {
84         if(!defined) return;
85         results.push_back(i->eval().convert<long>());
86     }
87     virtual void visit(RegisterAST* )
88     {
89         if(!defined) return;
90         results.push_back(0);
91     }
92     virtual void visit(Dereference* )
93     {
94         defined = false;
95     }
96     
97 };
98
99
100 bool IA_IAPI::isFrameSetupInsn(Instruction::Ptr i) const
101 {
102     if(i->getOperation().getID() == e_mov)
103     {
104         if(i->isRead(stackPtr[img->getArch()]) &&
105            i->isWritten(framePtr[img->getArch()]))
106         {
107             return true;
108         }
109     }
110     return false;
111 }
112
113 bool IA_IAPI::isNop() const
114 {
115     // TODO: add LEA no-ops
116     assert(curInsn());
117     if(curInsn()->getOperation().getID() == e_nop)
118         return true;
119     if(curInsn()->getOperation().getID() == e_lea)
120     {
121         std::set<Expression::Ptr> memReadAddr;
122         curInsn()->getMemoryReadOperands(memReadAddr);
123         std::set<RegisterAST::Ptr> writtenRegs;
124         curInsn()->getWriteSet(writtenRegs);
125         
126         if(memReadAddr.size() == 1 && writtenRegs.size() == 1)
127         {
128             if(**(memReadAddr.begin()) == **(writtenRegs.begin()))
129             {
130                 return true;
131             }
132             // TODO: check for zero displacement--do we want to bind here?
133         }
134     }
135     return false;
136 }
137
138 bool IA_IAPI::isMovAPSTable(std::vector<std::pair< Address, EdgeTypeEnum > >& outEdges) const
139 {
140     /**
141     * AMD-64 gcc emits a re-accuring idiom of:
142      *         jmpq   *%r8
143      *         movaps %xmm7,-0xf(%rax)
144      *         movaps %xmm6,-0x1f(%rax)
145      *         movaps %xmm5,-0x2f(%rax)
146      *         movaps %xmm4,-0x3f(%rax)
147      *         movaps %xmm3,-0x4f(%rax)
148      *         movaps %xmm2,-0x5f(%rax)
149      *         movaps %xmm1,-0x6f(%rax)
150      *         movaps %xmm0,-0x7f(%rax)
151      *         <other>
152      *
153      * The jump register is calculated in such a way that it'll be difficult
154      * for our analysis to figure it out.  Instead we'll recognize the pattern
155      * of the 'movaps'.  Note that the following instruction is also a valid jump
156      * target
157      **/
158     parsing_printf("\tChecking for movaps table at 0x%lx...\n", current);
159     std::set<Address> found;
160     const unsigned char *bufferBegin =
161             (const unsigned char *)img->getPtrToInstruction(current, context);
162     if( bufferBegin == NULL ) {
163         parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",
164                        FILE__, __LINE__);
165         return false;
166     }
167
168     unsigned int size = (img->imageOffset() + img->imageLength()) - current;
169     InstructionDecoder d(bufferBegin, size, img->getArch());
170     Address cur = current;
171     unsigned last_insn_size = 0;
172     InstructionAPI::Instruction::Ptr i = d.decode();
173     cur += i->size();
174     for (;;) {
175         InstructionAPI::Instruction::Ptr insn = d.decode();
176         //All insns in sequence are movaps
177         parsing_printf("\t\tChecking instruction %s\n", insn->format().c_str());
178         if (insn->getOperation().getID() != e_movapd &&
179             insn->getOperation().getID() != e_movaps)
180         {
181             break;
182         }
183         //All insns are same size
184         if (last_insn_size == 0)
185             last_insn_size = insn->size();
186         else if (last_insn_size != insn->size())
187             break;
188
189         found.insert(cur);
190
191         cur += insn->size();
192     }
193     if (found.size() == 8) {
194         found.insert(cur);
195         //It's a match
196         for (std::set<Address>::iterator i=found.begin(); i != found.end(); i++) {
197             outEdges.push_back(std::make_pair(*i, ET_INDIR));
198         }
199         parsing_printf("\tfound\n");
200         return true;
201     }
202     parsing_printf("\tnot found (%d insns)\n", found.size());
203     return false;
204 }
205
206 bool IA_IAPI::isTableInsn(Instruction::Ptr i) const
207 {
208     if(i->getOperation().getID() == e_mov && i->readsMemory() && !i->writesMemory())
209     {
210         return true;
211     }
212     if(i->getOperation().getID() == e_lea)
213     {
214         return true;
215     }
216     return false;
217 }
218         
219 std::map<Address, Instruction::Ptr>::const_iterator IA_IAPI::findTableInsn() const
220 {
221     // Check whether the jump is our table insn!
222     Expression::Ptr cft = curInsn()->getControlFlowTarget();
223     if(cft)
224     {
225         std::vector<InstructionAST::Ptr> tmp;
226         cft->getChildren(tmp);
227         if(tmp.size() == 1)
228         {
229             Expression::Ptr cftAddr = dyn_detail::boost::dynamic_pointer_cast<Expression>(tmp[0]);
230             zeroAllRegisters z;
231             cftAddr->apply(&z);
232             parsing_printf("\tChecking indirect jump %s for table insn\n", curInsn()->format().c_str());
233             if(z.isDefined() && z.getResult())
234             {
235                 parsing_printf("\tAddress in jump\n");
236                 return allInsns.find(current);
237             }
238         }
239     }
240     std::map<Address, Instruction::Ptr>::const_iterator c =
241             allInsns.find(current);
242     while(!isTableInsn(c->second) && c != allInsns.begin())
243     {
244         --c;
245     }
246     if(isTableInsn(c->second))
247     {
248         return c;
249     }
250     return allInsns.end();
251 }
252         
253 // This should only be called on a known indirect branch...
254 bool IA_IAPI::parseJumpTable(image_basicBlock* currBlk,
255                              std::vector<std::pair< Address, EdgeTypeEnum> >& outEdges) const
256 {
257     if(isIPRelativeBranch())
258     {
259         return false;
260     }
261     std::vector<image_edge*> sourceEdges;
262     currBlk->getSources(sourceEdges);
263     if(sourceEdges.empty())
264     {
265         return false;
266     }
267     if(isMovAPSTable(outEdges))
268     {
269         return true;
270     }
271     bool foundJCCAlongTaken = false;
272     Instruction::Ptr tableInsn;
273     Address tableInsnAddr;
274     std::map<Address, Instruction::Ptr>::const_iterator tableLoc = findTableInsn();
275     if(tableLoc == allInsns.end())
276     {
277         parsing_printf("\tunable to find table insn\n");
278         return false;
279     }
280     tableInsnAddr = tableLoc->first;
281     tableInsn = tableLoc->second;
282     Instruction::Ptr maxSwitchInsn, branchInsn;
283     boost::tie(maxSwitchInsn, branchInsn, foundJCCAlongTaken) = findMaxSwitchInsn(currBlk);
284     if(!maxSwitchInsn || !branchInsn)
285     {
286         parsing_printf("\tunable to fix max switch size\n");
287         return false;
288     }
289     Address thunkOffset;
290     Address thunkInsnAddr;
291     boost::tie(thunkInsnAddr, thunkOffset) = findThunkAndOffset(currBlk);
292     if(thunkInsnAddr != 0)
293     {
294         std::map<Address, Instruction::Ptr>::const_iterator thunkLoc =
295                 allInsns.find(thunkInsnAddr);
296         if(thunkLoc != allInsns.end())
297         {
298             tableLoc = thunkLoc;
299             tableInsnAddr = thunkInsnAddr;
300             tableInsn = thunkLoc->second;
301         }
302     }
303     parsing_printf("\ttableInsn %s at 0x%lx\n",tableInsn->format().c_str(), tableInsnAddr);
304     if(thunkOffset) {
305         parsing_printf("\tThunk-calculated table base address: 0x%lx\n",
306                        thunkOffset);
307     }
308     unsigned tableSize = 0, tableStride = 0;
309     bool ok = computeTableBounds(maxSwitchInsn, branchInsn, tableInsn, foundJCCAlongTaken,
310                                  tableSize, tableStride);
311     if(!ok)
312     {
313         return false;
314     }
315     std::map<Address, Instruction::Ptr>::const_iterator cur = allInsns.find(current);
316     
317     while(tableLoc != cur)
318     {
319         tableLoc++;
320         if(tableLoc->second->getOperation().getID() == e_lea)
321         {
322             parsing_printf("\tchecking instruction %s at 0x%lx for IP-relative LEA\n", tableLoc->second->format().c_str(),
323                            tableLoc->first);
324             Expression::Ptr IPRelAddr = tableLoc->second->getOperand(1).getValue();
325             IPRelAddr->bind(thePC[img->getArch()].get(), Result(s64, tableLoc->first + tableLoc->second->size()));
326             Result iprel = IPRelAddr->eval();
327             if(iprel.defined)
328             {
329                 parsing_printf("\trevising tableInsn to %s at 0x%lx\n",tableLoc->second->format().c_str(), tableLoc->first);
330                 tableInsn = tableLoc->second;
331             }
332
333         }
334         else
335         {
336             parsing_printf("\tChecking for sign-extending mov at 0x%lx...\n", tableLoc->first);
337             if(tableLoc->second->getOperation().getID() == e_movsxd ||
338                tableLoc->second->getOperation().getID() == e_movsx)
339             {
340                 std::set<Expression::Ptr> movsxReadAddr;
341                 tableLoc->second->getMemoryReadOperands(movsxReadAddr);
342                 static Immediate::Ptr four(new Immediate(Result(u8, 4)));
343                 static Expression::Ptr dummy4(new DummyExpr());
344                 static Expression::Ptr dummy2(new DummyExpr());
345                 static Immediate::Ptr two(new Immediate(Result(u8, 2)));
346                 static BinaryFunction::funcT::Ptr multiplier(new BinaryFunction::multResult());
347                 static BinaryFunction::Ptr scaleCheck4(new BinaryFunction(four, dummy4, (tableStride == 8) ? u64: u32,
348                         multiplier));
349                 static BinaryFunction::Ptr scaleCheck2(new BinaryFunction(two, dummy2, (tableStride == 8) ? u64: u32,
350                         multiplier));
351                 for(std::set<Expression::Ptr>::const_iterator readExp =
352                     movsxReadAddr.begin();
353                     readExp != movsxReadAddr.end();
354                     ++readExp)
355                 {
356                     if((*readExp)->isUsed(scaleCheck4))
357                     {
358                         parsing_printf("\tFound sign-extending mov, revising table stride to scale (4)\n");
359                         tableStride = 4;
360                     }
361                     else if((*readExp)->isUsed(scaleCheck2))
362                     {
363                         parsing_printf("\tFound sign-extending mov, revising table stride to scale (2)\n");
364                         tableStride = 2;
365                     }
366                     else
367                     {
368                         parsing_printf("\tFound sign-extending mov insn %s, readExp %s\n",
369                                        tableLoc->second->format().c_str(), (*readExp)->format().c_str());
370                         parsing_printf("\tcouldn't match stride expression %s or %s--HELP!\n",
371                                        scaleCheck2->format().c_str(), scaleCheck4->format().c_str());
372                     }
373                 }
374                 break;
375             }
376         }
377     }
378     Address tableBase = getTableAddress(tableInsn, thunkOffset);
379     return fillTableEntries(thunkOffset, tableBase,
380                             tableSize, tableStride, outEdges);
381 }
382 namespace detail
383 {
384     bool isNonCallEdge(image_edge* e)
385     {
386         return e->getType() != ET_CALL;
387     }
388     bool leadsToVisitedBlock(image_edge* e, const std::set<image_basicBlock*>& visited)
389     {
390         image_basicBlock* src = e->getSource();
391         return visited.find(src) != visited.end();
392     }
393 };
394
395
396 Address IA_IAPI::findThunkInBlock(image_basicBlock* curBlock, Address& thunkOffset) const
397 {
398     const unsigned char* buf =
399             (const unsigned char*)(img->getPtrToInstruction(curBlock->firstInsnOffset(), context));
400     if( buf == NULL ) {
401         parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",
402                        FILE__, __LINE__);
403         return false;
404     }
405     
406     InstructionDecoder dec(buf, 
407                            curBlock->getSize() + InstructionDecoder::maxInstructionLength, 
408                            img->getArch());
409     IA_IAPI * blockptr = NULL;
410     if(context)
411         blockptr = new IA_IAPI(dec,curBlock->firstInsnOffset(),context);
412     else
413         blockptr = new IA_IAPI(dec,curBlock->firstInsnOffset(),img);
414     IA_IAPI & block = *blockptr;
415
416     parsing_printf("\tchecking block at 0x%lx for thunk\n", curBlock->firstInsnOffset());
417     while(block.getAddr() < curBlock->endOffset())
418     {
419         if(block.getInstruction()->getCategory() == c_CallInsn)
420         {
421             parsing_printf("\tchecking call at 0x%lx for thunk\n", block.getAddr());
422             if(!block.isRealCall())
423             {
424                 parsing_printf("\tthunk found at 0x%lx, checking for add\n", block.getAddr());
425                 block.advance();
426                 thunkOffset = block.getAddr();
427                 Instruction::Ptr addInsn = block.getInstruction();
428                 if(addInsn)
429                     parsing_printf("\tinsn after thunk: %s\n", addInsn->format().c_str());
430                 else
431                     parsing_printf("\tNO INSN after thunk at 0x%lx\n", thunkOffset);
432                 if(addInsn && (addInsn->getOperation().getID() == e_add))
433                 {
434                     Result imm = addInsn->getOperand(1).getValue()->eval();
435                     Result imm2 = addInsn->getOperand(0).getValue()->eval();
436                     if(imm.defined)
437                     {
438                         Address thunkDiff = imm.convert<Address>();
439                         parsing_printf("\tsetting thunkOffset to 0x%lx (0x%lx + 0x%lx)\n",
440                                        thunkOffset+thunkDiff, thunkOffset, thunkDiff);
441                         Address ret = block.getPrevAddr();
442                         thunkOffset = thunkOffset + thunkDiff;
443                         delete blockptr;
444                         return ret;
445                     }
446                     else if(imm2.defined)
447                     {
448                         Address thunkDiff = imm2.convert<Address>();
449                         parsing_printf("\tsetting thunkOffset to 0x%lx (0x%lx + 0x%lx)\n",
450                                        thunkOffset+thunkDiff, thunkOffset, thunkDiff);
451                         Address ret = block.getPrevAddr();
452                         thunkOffset = thunkOffset + thunkDiff;
453                         delete blockptr;
454                         return ret;
455                     }
456                     else
457                     {
458                         parsing_printf("\tadd insn %s found following thunk at 0x%lx, couldn't bind operands!\n",
459                                        addInsn->format().c_str(), thunkOffset);
460                     }
461                 }
462                 thunkOffset = 0;
463             }
464         }
465         else if(block.getInstruction()->getOperation().getID() == e_lea)
466             // Look for an AMD64 IP-relative LEA.  If we find one, it should point to the start of a
467         {    // relative jump table.
468             parsing_printf("\tchecking instruction %s at 0x%lx for IP-relative LEA\n", block.getInstruction()->format().c_str(),
469                            block.getAddr());
470             Expression::Ptr IPRelAddr = block.getInstruction()->getOperand(1).getValue();
471             IPRelAddr->bind(thePC[img->getArch()].get(), Result(s64, block.getNextAddr()));
472             Result iprel = IPRelAddr->eval();
473             if(iprel.defined)
474             {
475                 thunkOffset = iprel.convert<Address>();
476                 parsing_printf("\tsetting thunkOffset to 0x%lx at 0x%lx\n",thunkOffset, block.getAddr());
477                 Address ret = block.getAddr();
478                 delete blockptr;
479                 return ret;
480             }
481         }
482         block.advance();
483     }
484     delete blockptr;
485     return 0;
486 }
487
488
489 std::pair<Address, Address> IA_IAPI::findThunkAndOffset(image_basicBlock* start) const
490 {
491     std::set<image_basicBlock*> visited;
492     std::deque<image_basicBlock*> worklist;
493     pdvector<image_edge*> sources;
494     image_basicBlock* curBlock;
495     worklist.insert(worklist.begin(), start);
496     visited.insert(start);
497     Address thunkOffset = 0;
498     while(!worklist.empty())
499     {
500         curBlock = worklist.front();
501         worklist.pop_front();
502         // If the only thunk we can find within this function that precedes the
503         // indirect jump in control flow (we think) is after it in the address space,
504         // it may be a jump table but it's not one our heuristics should expect to
505         // handle well.  Better to bail out than try to parse something that will be garbage.
506         if(curBlock->firstInsnOffset() > start->firstInsnOffset()) continue;
507         Address tableInsnAddr = findThunkInBlock(curBlock, thunkOffset);
508         if(tableInsnAddr != 0)
509         {
510             return std::make_pair(tableInsnAddr, thunkOffset);
511         }
512
513         sources.clear();
514         curBlock->getSources(sources);
515         for (unsigned i=0; i<sources.size(); i++) {
516             image_edge *e = sources[i];
517             if(detail::isNonCallEdge(e) && !detail::leadsToVisitedBlock(e, visited))
518             {
519                 worklist.push_back(e->getSource());
520                 visited.insert(e->getSource());
521             }
522         }
523     }
524     return std::make_pair(0, 0);
525
526 }
527
528 boost::tuple<Instruction::Ptr,
529  Instruction::Ptr,
530  bool> IA_IAPI::findMaxSwitchInsn(image_basicBlock *start) const
531 {
532     std::set<image_basicBlock *> visited;
533     std::vector<image_basicBlock *> WL;
534     std::vector<image_edge *> sources;
535     std::vector<image_edge *> targets;
536     image_basicBlock *curBlk;
537     int depth = 0;
538
539     bool foundMaxSwitch = false;
540     bool foundCondBranch = false;
541     Address maxSwitchAddr = 0;
542
543     WL.push_back(start);
544     Instruction::Ptr compareInsn, condBranchInsn;
545     bool compareOnTakenBranch = false;
546     for(unsigned j=0;j < WL.size(); j++)
547     {
548         curBlk = WL[j];
549         visited.insert(curBlk);
550
551         foundMaxSwitch = false;
552         foundCondBranch = false;
553         const unsigned char* buf =
554                 (const unsigned char*)(img->getPtrToInstruction(curBlk->firstInsnOffset(), context));
555         if( buf == NULL ) {
556             parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",
557                            FILE__, __LINE__);
558             return boost::make_tuple(Instruction::Ptr(), Instruction::Ptr(), false);
559         }
560         InstructionDecoder dec(buf, curBlk->getSize(), img->getArch());
561         Instruction::Ptr i;
562         Address curAdr = curBlk->firstInsnOffset();
563         while(i = dec.decode())
564         {
565             if(i->getCategory() == c_CompareInsn)
566             // check for cmp
567             {
568                 parsing_printf("\tFound jmp table cmp instruction %s at 0x%lx\n",
569                                i->format().c_str(), curAdr);
570                 compareInsn = i;
571                 maxSwitchAddr = curAdr;
572                 foundMaxSwitch = true;
573             }
574             if(i->getCategory() == c_BranchInsn &&
575                i->allowsFallThrough())
576             {
577                 parsing_printf("\tFound jmp table cond br instruction %s at 0x%lx\n",
578                                i->format().c_str(), curAdr);
579                 condBranchInsn = i;
580                 foundCondBranch = true;
581
582                 targets.clear();
583                 curBlk->getTargets(targets);
584                 bool taken_hit = false;
585                 bool fallthrough_hit = false;
586                 for (unsigned i=0; i<targets.size(); i++) {
587                     if (targets[i]->getType() == ET_COND_TAKEN &&
588                         (visited.find(targets[i]->getTarget()) != visited.end()))
589                     {
590                         taken_hit = true;
591                     }
592                     if ((targets[i]->getType() == ET_COND_NOT_TAKEN ||
593                          targets[i]->getType() == ET_FALLTHROUGH) &&
594                          (visited.find(targets[i]->getTarget()) != visited.end()))
595                     {
596                         fallthrough_hit = true;
597                     }
598                 }
599                 parsing_printf("\tfindMaxSwitchInsn: taken_hit: %d, fallthrough_hit: %d\n", taken_hit, fallthrough_hit);
600                 compareOnTakenBranch = taken_hit && !fallthrough_hit;
601                 break;
602             }
603             curAdr += i->size();
604         }
605
606         if(foundMaxSwitch && foundCondBranch)
607             break; // done
608
609             // look further back
610         sources.clear();
611         curBlk->getSources( sources );
612         depth++;
613             // We've seen depth 2 in libc et al
614         if(depth > 2) return boost::make_tuple(Instruction::Ptr(), Instruction::Ptr(), false);
615             
616         for(unsigned i=0;i<sources.size();i++)
617         {
618             if(sources[i]->getType() == ET_CALL)
619                 return boost::make_tuple(Instruction::Ptr(), Instruction::Ptr(), false);
620
621             image_basicBlock * src = sources[i]->getSource();
622             if( (visited.find( src ) == visited.end())) {
623                 WL.push_back(src);
624             }
625         }
626     }
627     WL.clear();
628     parsing_printf("\tfindMaxSwitchInsn: table on taken branch: %d, returning: %d\n", compareOnTakenBranch, foundMaxSwitch &&
629             foundCondBranch);
630     
631     return boost::make_tuple(compareInsn, condBranchInsn, compareOnTakenBranch);
632 }
633  
634 Address IA_IAPI::getTableAddress(Instruction::Ptr tableInsn, Address thunkOffset) const
635 {
636     // Extract displacement from table insn
637     Expression::Ptr displacementSrc;
638     if(tableInsn->getCategory() == c_BranchInsn)
639     {
640         Expression::Ptr op = tableInsn->getOperand(0).getValue();
641         std::vector<InstructionAST::Ptr> tmp;
642         op->getChildren(tmp);
643         if(tmp.empty())
644         {
645             displacementSrc = op;
646         }
647         else
648         {
649             displacementSrc = dyn_detail::boost::dynamic_pointer_cast<Expression>(tmp[0]);
650         }
651     }
652     else
653     {
654         parsing_printf("\tcracking table instruction %s\n", tableInsn->format().c_str());
655         std::vector<InstructionAST::Ptr> tmp;
656         Expression::Ptr op = tableInsn->getOperand(1).getValue();
657         if(!op)
658         {
659             parsing_printf("\ttable insn BAD! (no second operand)\n");
660             return 0;
661         }
662         if(tableInsn->getOperation().getID() != e_lea)
663         {
664             op->getChildren(tmp);
665             if(tmp.empty())
666             {
667                 parsing_printf("\ttable insn BAD! (not LEA, second operand not a deref)\n");
668                 return 0;
669             }
670             displacementSrc = dyn_detail::boost::dynamic_pointer_cast<Expression>(tmp[0]);
671         }
672         else
673         {
674             displacementSrc = op;
675         }
676     }
677     
678     zeroAllRegisters z;
679     displacementSrc->apply(&z);
680     
681     Address jumpTable = 0;
682     //if(!disp.defined)
683     if(!z.isDefined())
684     {
685         if(!thunkOffset)
686         {
687             parsing_printf("\ttable insn: %s, displacement %s, bind of all GPRs FAILED\n",
688                            tableInsn->format().c_str(), displacementSrc->format().c_str());
689             return 0;
690         }
691     }
692     else
693     {
694 //        jumpTable = disp.convert<Address>();
695         jumpTable = z.getResult();
696     }
697     parsing_printf("\tjumpTable set to 0x%lx\n",jumpTable);
698
699     if(!jumpTable && !thunkOffset)
700     {
701         return 0;
702     }
703     jumpTable += thunkOffset;
704     parsing_printf("\tjumpTable revised to 0x%lx\n",jumpTable);
705     // On Windows, all of our other addresses have had the base address
706     // of their module subtracted off before we ever use them.  We need to fix this up
707     // to conform to that standard so that Symtab actually believes that there's code
708     // at the table's destination.
709 #if defined(os_windows)
710     jumpTable -= img->getObject()->getLoadOffset();
711 #endif
712     if( !img->isValidAddress(jumpTable) )
713 {
714         // If the "jump table" has a start address that is outside
715         // of the valid range of the binary, we can say with high
716         // probability that we have misinterpreted some other
717         // construct (such as a function pointer comparison & tail
718         // call, for example) as a jump table. Give up now.
719     return 0;
720 }
721     return jumpTable;
722 }
723
724 bool IA_IAPI::fillTableEntries(Address thunkOffset,
725                                Address tableBase,
726                                unsigned tableSize,
727                                unsigned tableStride,
728                                std::vector<std::pair< Address, EdgeTypeEnum> >& outEdges) const
729 {
730     if( !img->isValidAddress(tableBase) )
731     {
732         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", tableBase);
733         return false;
734     }
735     Region* tableRegion = img->getObject()->findEnclosingRegion(current);
736     if(!tableRegion) 
737     {
738       parsing_printf("\tERROR: no region associated with table address, bailing out\n");
739       return false;
740     }
741           
742     for(unsigned int i=0; i < tableSize; i++)
743     {
744         Address tableEntry = tableBase + (i * tableStride);
745         Address jumpAddress = 0;
746                 
747         if(img->isValidAddress(tableEntry))
748         {
749             if(tableStride == sizeof(Address)) {
750                                         // Unparseable jump table
751                 jumpAddress = *(const Address *)img->getPtrToInstruction(tableEntry, context);
752                 if( jumpAddress == NULL ) {
753                     parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",
754                                    FILE__, __LINE__);
755                     return false;
756                 }
757             }
758             else {
759                 jumpAddress = *(const int *)img->getPtrToInstruction(tableEntry, context);
760                 if( jumpAddress == NULL ) {
761                     parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",
762                                    FILE__, __LINE__);
763                     return false;
764                 }
765             }
766         }
767 #if defined(os_windows)
768                 if(img)
769 {
770     jumpAddress -= img->getObject()->getLoadOffset();
771 }
772 #endif
773                 if(thunkOffset)
774 {
775     jumpAddress += thunkOffset;
776 }
777                 if (!(tableRegion->isOffsetInRegion(jumpAddress))) {
778                     parsing_printf("\tentry %d [0x%lx] -> 0x%lx, invalid addr, skipping\n",
779                                    i, tableEntry, jumpAddress);
780                     continue;
781                 }
782                 parsing_printf("\tentry %d [0x%lx] -> 0x%lx\n",i,tableEntry,jumpAddress);
783
784                 outEdges.push_back(std::make_pair(jumpAddress, ET_INDIR));
785                 
786     }
787     if(outEdges.empty()) parsing_printf("\tno valid table entries, ret false\n");
788     return !outEdges.empty();
789     
790 }
791
792
793 bool IA_IAPI::computeTableBounds(Instruction::Ptr maxSwitchInsn,
794                                  Instruction::Ptr branchInsn,
795                                  Instruction::Ptr tableInsn,
796                                  bool foundJCCAlongTaken,
797                                  unsigned& tableSize,
798                                  unsigned& tableStride) const
799 {
800     assert(maxSwitchInsn && branchInsn);
801     Result compareBound = maxSwitchInsn->getOperand(1).getValue()->eval();
802     if(!compareBound.defined) return false;
803     tableSize = compareBound.convert<unsigned>();
804     // Sanity check the bounds; 32k tables would be an oddity, and larger is almost certainly
805     // a misparse
806     static const unsigned int maxTableSize = 32768;
807     if(tableSize > maxTableSize)
808     {
809         parsing_printf("\tmaxSwitch of %d above %d, BAILING OUT\n", tableSize, maxTableSize);
810         return false;
811     }
812     if(foundJCCAlongTaken)
813     {
814         if(branchInsn->getOperation().getID() == e_jbe ||
815            branchInsn->getOperation().getID() == e_jle)
816         {
817             tableSize++;
818         }
819     }
820     else
821     {
822         if(branchInsn->getOperation().getID() == e_jnbe ||
823            branchInsn->getOperation().getID() == e_jnle)
824         {
825             tableSize++;
826         }
827     }
828     parsing_printf("\tmaxSwitch set to %d\n", tableSize);
829     tableStride = img->getAddressWidth();
830     std::set<Expression::Ptr> tableInsnReadAddr;
831     tableInsn->getMemoryReadOperands(tableInsnReadAddr);
832     if(tableStride == 8)
833     {
834         static Immediate::Ptr four(new Immediate(Result(u8, 4)));
835         static BinaryFunction::funcT::Ptr multiplier(new BinaryFunction::multResult());
836         static Expression::Ptr dummy(new DummyExpr());
837         static BinaryFunction::Ptr scaleCheck(new BinaryFunction(four, dummy, u64, multiplier));
838         for(std::set<Expression::Ptr>::const_iterator curExpr = tableInsnReadAddr.begin();
839             curExpr != tableInsnReadAddr.end();
840             ++curExpr)
841         {
842             if((*curExpr)->isUsed(scaleCheck))
843             {
844                 tableSize = tableSize >> 1;
845                 parsing_printf("\tmaxSwitch revised to %d\n",tableSize);
846             }
847         }
848     }
849     return true;
850 }
851
852 bool IA_IAPI::isThunk() const {
853   // Before we go a-wandering, check the target
854     if (!img->isValidAddress(getCFT()))
855     {
856         parsing_printf("... Call to 0x%lx is invalid (outside code or data)\n",
857                        getCFT());
858         return false;
859     }
860
861     const unsigned char *target =
862             (const unsigned char *)img->getPtrToInstruction(getCFT());
863   // We're decoding two instructions: possible move and possible return.
864   // Check for move from the stack pointer followed by a return.
865     InstructionDecoder targetChecker(target, 2 * InstructionDecoder::maxInstructionLength,
866                                      img->getArch());
867     Instruction::Ptr thunkFirst = targetChecker.decode();
868     Instruction::Ptr thunkSecond = targetChecker.decode();
869     parsing_printf("... checking call target for thunk, insns are %s, %s\n", thunkFirst->format().c_str(),
870                    thunkSecond->format().c_str());
871     if(thunkFirst && (thunkFirst->getOperation().getID() == e_mov))
872     {
873         if(thunkFirst->isRead(stackPtr[img->getArch()]))
874         {
875             parsing_printf("... checking second insn\n");
876             if(!thunkSecond) {
877                 parsing_printf("...no second insn\n");
878                 return false;
879             }
880             if(thunkSecond->getCategory() != c_ReturnInsn)
881             {
882                 parsing_printf("...insn %s not a return\n", thunkSecond->format().c_str());
883                 return false;
884             }
885             return true;
886         }
887     }
888     parsing_printf("... real call found\n");
889     return false;
890 }
891
892 bool IA_IAPI::isTailCall(unsigned int) const
893 {
894     if(tailCall.first) {
895         parsing_printf("\tReturning cached tail call check result: %d\n", tailCall.second);
896         return tailCall.second;
897     }
898     tailCall.first = true;
899     if(allInsns.size() < 2) {
900         tailCall.second = false;
901         parsing_printf("\tindirect jump with too few insns, not a tail call\n");
902         return tailCall.second;
903     }
904     if(curInsn()->getCategory() == c_BranchInsn ||
905        curInsn()->getCategory() == c_CallInsn)
906     {
907         if(curInsn()->getCategory() == c_BranchInsn)
908         {
909             if(img->findFuncByEntry(getCFT()))
910             {
911                 parsing_printf("\tjump to 0x%lx, TAIL CALL\n", getCFT());
912                 tailCall.second = true;
913                 return tailCall.second;
914             }
915         }
916         std::map<Address, Instruction::Ptr>::const_iterator prevIter =
917                 allInsns.find(current);
918         --prevIter;
919         Instruction::Ptr prevInsn = prevIter->second;
920         if(prevInsn->getOperation().getID() == e_leave)
921         {
922             parsing_printf("\tprev insn was leave, TAIL CALL\n");
923             tailCall.second = true;
924             return tailCall.second;
925         }
926         if(prevInsn->getOperation().getID() == e_pop)
927         {
928             if(prevInsn->isWritten(framePtr[img->getArch()]))
929             {
930                 parsing_printf("\tprev insn was %s, TAIL CALL\n", prevInsn->format().c_str());
931                 tailCall.second = true;
932                 return tailCall.second;
933             }
934             parsing_printf("\tprev insn was %s, not tail call\n", prevInsn->format().c_str());
935         }
936     }
937     tailCall.second = false;
938     return tailCall.second;
939 }
940
941 bool IA_IAPI::checkEntry() const
942 {
943     if(curInsn()->getCategory() == c_BranchInsn)
944     {
945         // Indirect branches are okay; 
946         if(!curInsn()->allowsFallThrough() && getCFT())
947         {
948             return false;
949         }
950     }
951     if(curInsn()->getCategory() == c_ReturnInsn)
952     {
953         return false;
954     }
955     // We don't consider linkage snippets "functions".
956     dictionary_hash<Address, std::string> *pltFuncs = img->getPltFuncs();
957     if (pltFuncs && pltFuncs->defines(getAddr()))
958     {
959         return false;
960     }
961
962     return true;
963 }
964
965 bool IA_IAPI::savesFP() const
966 {
967     if(curInsn()->getOperation().getID() == e_push)
968     {
969         return(curInsn()->isRead(framePtr[img->getArch()]));
970     }
971     return false;
972 }
973
974 bool IA_IAPI::isStackFramePreamble(int& /*frameSize*/) const
975 {
976     if(savesFP())
977     {
978         InstructionDecoder tmp(dec);
979         std::vector<Instruction::Ptr> nextTwoInsns;
980         nextTwoInsns.push_back(tmp.decode());
981         nextTwoInsns.push_back(tmp.decode());
982         if(isFrameSetupInsn(nextTwoInsns[0]) ||
983            isFrameSetupInsn(nextTwoInsns[1]))
984         {
985             return true;
986         }
987     }
988     return false;
989 }
990
991 bool IA_IAPI::cleansStack() const
992 {
993     return (curInsn()->getCategory() == c_ReturnInsn) &&
994             curInsn()->getOperand(0).getValue();
995
996 }