Start to split jump table analysis to two different slices.
[dyninst.git] / parseAPI / src / IndirectASTVisitor.C
1 #include "dyntypes.h"
2 #include "IndirectASTVisitor.h"
3 #include "debug_parse.h"
4 #include "CodeObject.h"
5 #include <algorithm>
6 #include "SymbolicExpression.h"
7 using namespace Dyninst::ParseAPI;
8 #define SIGNEX_64_32 0xffffffff00000000LL
9 #define SIGNEX_64_16 0xffffffffffff0000LL
10 #define SIGNEX_64_8  0xffffffffffffff00LL
11 #define SIGNEX_32_16 0xffff0000
12 #define SIGNEX_32_8 0xffffff00
13
14
15 AST::Ptr SimplifyVisitor::visit(DataflowAPI::RoseAST *ast) {
16         unsigned totalChildren = ast->numChildren();
17         for (unsigned i = 0 ; i < totalChildren; ++i) {
18             ast->child(i)->accept(this);
19             ast->setChild(i, SymbolicExpression::SimplifyRoot(ast->child(i), addr));
20         }
21         return AST::Ptr();
22 }
23 /*
24 AST::Ptr BoundCalcVisitor::visit(DataflowAPI::RoseAST *ast) {
25     BoundValue *astBound = boundFact.GetBound(ast);
26     if (astBound != NULL) {
27         bound.insert(make_pair(ast, new BoundValue(*astBound)));
28         return AST::Ptr();
29     }
30     unsigned totalChildren = ast->numChildren();
31     for (unsigned i = 0 ; i < totalChildren; ++i) {
32         ast->child(i)->accept(this);
33     }
34     switch (ast->val().op) {
35         case ROSEOperation::addOp:
36             if (IsResultBounded(ast->child(0)) && IsResultBounded(ast->child(1))) {         
37                 BoundValue* val = new BoundValue(*GetResultBound(ast->child(0)));               
38                 val->Add(*GetResultBound(ast->child(1)));
39                 if (*val != BoundValue::top)
40                     bound.insert(make_pair(ast, val));
41             }       
42             break;
43         case ROSEOperation::invertOp:
44             if (IsResultBounded(ast->child(0))) {
45                 BoundValue *val = new BoundValue(*GetResultBound(ast->child(0)));
46                 if (val->tableReadSize)
47                     val->isInverted = true;
48                 else {
49                     val->Invert();
50                 }
51                 if (*val != BoundValue::top)
52                     bound.insert(make_pair(ast,val));
53             }
54             break;
55         case ROSEOperation::andOp: {
56             // For and operation, even one of them is a top,
57             // we may still produce bound result.
58             // For example, top And 0[3,3] => 1[0,3]
59             //
60             // the bound produced by and may be more relaxed than
61             // a cmp bound not found yet. So we only apply and
62             // bound when this is the last attempt
63             if (handleOneByteRead) {
64                 BoundValue *val = NULL;
65                 if (IsResultBounded(ast->child(0)))
66                     val = new BoundValue(*GetResultBound(ast->child(0)));
67                 else
68                     val = new BoundValue(BoundValue::top);
69                 if (IsResultBounded(ast->child(1)))
70                     val->And(*GetResultBound(ast->child(1)));
71                 else
72                     val->And(StridedInterval::top);
73                 // the result of an AND operation should not be
74                 // the table lookup. Set all other values to default
75                 val->ClearTableCheck();
76                 if (*val != BoundValue::top)
77                     bound.insert(make_pair(ast, val));
78             }
79             break;
80         }
81         case ROSEOperation::sMultOp:
82         case ROSEOperation::uMultOp:
83             if (IsResultBounded(ast->child(0)) && IsResultBounded(ast->child(1))) {
84                 BoundValue *val = new BoundValue(*GetResultBound(ast->child(0)));
85                 val->Mul(*GetResultBound(ast->child(1)));
86                 if (*val != BoundValue::top)
87                     bound.insert(make_pair(ast, val));
88             }
89             break;
90         case ROSEOperation::shiftLOp:
91             if (IsResultBounded(ast->child(0)) && IsResultBounded(ast->child(1))) {
92                 BoundValue *val = new BoundValue(*GetResultBound(ast->child(0)));
93                 val->ShiftLeft(*GetResultBound(ast->child(1)));
94                 if (*val != BoundValue::top)
95                     bound.insert(make_pair(ast, val));
96             }
97             break;
98         case ROSEOperation::shiftROp:
99             if (IsResultBounded(ast->child(0)) && IsResultBounded(ast->child(1))) {
100                 BoundValue *val = new BoundValue(*GetResultBound(ast->child(0)));
101                 val->ShiftRight(*GetResultBound(ast->child(1)));
102                 if (*val != BoundValue::top)
103                     bound.insert(make_pair(ast, val));
104             }
105             break;
106         case ROSEOperation::derefOp: 
107             if (IsResultBounded(ast->child(0))) {
108                 BoundValue *val = new BoundValue(*GetResultBound(ast->child(0)));
109                 val->MemoryRead(block, derefSize);
110                 if (*val != BoundValue::top)
111                     bound.insert(make_pair(ast, val));
112             } else if (handleOneByteRead && ast->val().size == 8) {
113                 // Any 8-bit value is bounded in [0,255]
114                 // But I should only do this when I know the read 
115                 // itself is not a jump table
116                 bound.insert(make_pair(ast, new BoundValue(StridedInterval(1,0,255))));
117             }
118             break;
119         case ROSEOperation::orOp: 
120             if (IsResultBounded(ast->child(0)) && IsResultBounded(ast->child(1))) {
121                 BoundValue *val = new BoundValue(*GetResultBound(ast->child(0)));
122                 val->Or(*GetResultBound(ast->child(1)));
123                 if (*val != BoundValue::top)
124                     bound.insert(make_pair(ast, val));
125             }
126             break;
127         case ROSEOperation::ifOp:
128             if (IsResultBounded(ast->child(1)) && IsResultBounded(ast->child(2))) {
129                 BoundValue *val = new BoundValue(*GetResultBound(ast->child(1)));
130                 val->Join(*GetResultBound(ast->child(2)), block);
131                 if (*val != BoundValue::top)
132                     bound.insert(make_pair(ast, val));
133             }
134         default:
135             break;
136     }
137     return AST::Ptr();
138    
139 }
140
141 AST::Ptr BoundCalcVisitor::visit(DataflowAPI::ConstantAST *ast) {
142     const Constant &v = ast->val();
143     int64_t value = v.val;
144     if (v.size != 1 && v.size != 64 && (value & (1ULL << (v.size - 1)))) {
145         // Compute the two complements in bits of v.size
146         // and change it to a negative number
147         value = -(((~value) & ((1ULL << v.size) - 1)) + 1);
148     }
149     bound.insert(make_pair(ast, new BoundValue(value)));
150     return AST::Ptr();
151 }
152
153 AST::Ptr BoundCalcVisitor::visit(DataflowAPI::VariableAST *ast) {
154     BoundValue *astBound = boundFact.GetBound(ast);
155     if (astBound != NULL) 
156         bound.insert(make_pair(ast, new BoundValue(*astBound)));
157     return AST::Ptr();
158 }
159
160 BoundValue* BoundCalcVisitor::GetResultBound(AST::Ptr ast) {
161     if (IsResultBounded(ast)) {
162         return bound.find(ast.get())->second;
163     } else {
164         return NULL;
165     }       
166 }
167
168 BoundCalcVisitor::~BoundCalcVisitor() {
169     for (auto bit = bound.begin(); bit != bound.end(); ++bit)
170         if (bit->second != NULL)
171             delete bit->second;
172     bound.clear();
173 }
174
175
176 AST::Ptr JumpCondVisitor::visit(DataflowAPI::RoseAST *ast) {
177     if (ast->val().op == ROSEOperation::invertOp) {
178         invertFlag = true;
179         return AST::Ptr();
180     }
181     unsigned totalChildren = ast->numChildren();
182     for (unsigned i = 0 ; i < totalChildren; ++i) {
183         ast->child(i)->accept(this);
184     }
185     return AST::Ptr();
186 }
187
188 AST::Ptr ComparisonVisitor::visit(DataflowAPI::RoseAST *ast) {
189     // For cmp type instruction setting zf
190     // Looking like <eqZero?>(<add>(<V([x86_64::rbx])>,<Imm:8>,),)
191     // Assuming ast has been simplified
192     if (ast->val().op == ROSEOperation::equalToZeroOp) {
193         bool minuendIsZero = true;
194         AST::Ptr child = ast->child(0); 
195         if (child->getID() == AST::V_RoseAST) {
196             RoseAST::Ptr childRose = boost::static_pointer_cast<RoseAST>(child);
197             if (childRose->val().op == ROSEOperation::addOp) {
198                 minuendIsZero = false;
199                 subtrahend = childRose->child(0);
200                 minuend = childRose->child(1);
201                 // If the minuend is a constant, then
202                 // the minuend is currently in its two-complement form
203                 if (minuend->getID() == AST::V_ConstantAST) {
204                     ConstantAST::Ptr constAST = boost::static_pointer_cast<ConstantAST>(minuend);
205                     uint64_t val = constAST->val().val;
206                     int size = constAST->val().size;
207                     if (size < 64) 
208                         val = ((~val)+ 1) & ((1ULL << size) - 1);
209                     else if (size == 64)
210                         val = (~val) + 1;
211                     else
212                         parsing_printf("WARNING: constant bit size %d exceeds 64!\n", size);
213                     minuend = ConstantAST::create(Constant(val, size));
214                 } else if (minuend->getID() == AST::V_RoseAST) {
215                     RoseAST::Ptr sub = boost::static_pointer_cast<RoseAST>(minuend);
216                     minuend = AST::Ptr();
217                     if (sub->val().op == ROSEOperation::addOp && sub->child(0)->getID() == AST::V_RoseAST) {
218                         sub = boost::static_pointer_cast<RoseAST>(sub->child(0));
219                         if (sub->val().op == ROSEOperation::invertOp) {
220                             // Otherwise, the minuend ast is in the form of add(invert(minuend), 1)
221                             // Need to extract the real minuend
222                              minuend = sub->child(0);
223                         }
224                     }
225                 }
226             }   
227         } 
228         if (minuendIsZero) {
229             // The minuend is 0, thus the add operation is subsume.
230              subtrahend = ast->child(0);
231              minuend = ConstantAST::create(Constant(0));
232         }
233     }
234     return AST::Ptr();
235 }
236 */
237 JumpTableFormatVisitor::JumpTableFormatVisitor(ParseAPI::Block *bl) {
238     b = bl;
239     numOfVar = 0;
240     findIncorrectFormat = false;
241     findTableBase = false;
242     findIndex = false;
243 }
244
245 AST::Ptr JumpTableFormatVisitor::visit(DataflowAPI::RoseAST *ast) {
246     unsigned totalChildren = ast->numChildren();
247     for (unsigned i = 0 ; i < totalChildren; ++i) {
248         ast->child(i)->accept(this);
249     }
250
251     if (ast->val().op == ROSEOperation::derefOp) {
252         // We only check the first memory read
253         if (ast->child(0)->getID() == AST::V_RoseAST) {
254             RoseAST::Ptr roseAST = boost::static_pointer_cast<RoseAST>(ast->child(0));
255             if (roseAST->val().op == ROSEOperation::derefOp) {
256                 // Two directly nested memory accesses cannot be jump tables
257                 parsing_printf("Two directly nested memory access, not jump table format\n");
258                 findIncorrectFormat = true;
259                 return AST::Ptr();
260             }
261         }
262     } else if (ast->val().op == ROSEOperation::addOp) {
263         Address tableBase = 0;
264         if (ast->child(0)->getID() == AST::V_ConstantAST && PotentialIndexing(ast->child(1))) {
265             ConstantAST::Ptr constAST = boost::static_pointer_cast<ConstantAST>(ast->child(0));
266             tableBase = (Address)constAST->val().val;
267         }
268         if (ast->child(1)->getID() == AST::V_ConstantAST && PotentialIndexing(ast->child(0))) {
269             ConstantAST::Ptr constAST = boost::static_pointer_cast<ConstantAST>(ast->child(1));
270             tableBase = (Address)constAST->val().val;
271         }
272         if (tableBase) {
273             Architecture arch = b->obj()->cs()->getArch();
274             if (arch == Arch_x86) {
275                 tableBase &= 0xffffffff;
276             }
277 #if defined(os_windows)
278             tableBase -= b->obj()->cs()->loadAddress();
279 #endif
280             if (!b->obj()->cs()->isValidAddress(tableBase)) {
281                 parsing_printf("\ttableBase 0x%lx invalid, not jump table format\n", tableBase);
282                 findIncorrectFormat = true;
283                 return AST::Ptr();
284             }
285             if (!b->obj()->cs()->isReadOnly(tableBase)) {
286                 parsing_printf("\ttableBase 0x%lx not read only, not jump table format\n", tableBase);
287                 findIncorrectFormat = true;
288                 return AST::Ptr();
289             }
290             // Note that this table base may not be within a memory read.
291             // Functions with variable arguments often have an indirect jump with form:
292             // targetBase - index * 4
293             // We merge this special case with other general jump table cases.
294             findTableBase = true;
295        }
296     } else if (ast->val().op == ROSEOperation::uMultOp || ast->val().op == ROSEOperation::sMultOp) {
297         if (ast->child(0)->getID() == AST::V_ConstantAST && ast->child(1)->getID() == AST::V_VariableAST) {
298             findIndex = true;
299             numOfVar++;
300             VariableAST::Ptr varAst = boost::static_pointer_cast<VariableAST>(ast->child(1));
301             index = varAst->val().reg;
302             return AST::Ptr();
303         }
304         if (ast->child(1)->getID() == AST::V_ConstantAST && ast->child(0)->getID() == AST::V_VariableAST) {
305             findIndex = true;
306             numOfVar++;
307             VariableAST::Ptr varAst = boost::static_pointer_cast<VariableAST>(ast->child(0));
308             index = varAst->val().reg;
309             return AST::Ptr();
310         }
311     }
312     return AST::Ptr();
313 }
314
315 AST::Ptr JumpTableFormatVisitor::visit(DataflowAPI::VariableAST *ast) {
316     numOfVar++;
317     return AST::Ptr();
318 }
319
320 bool JumpTableFormatVisitor::PotentialIndexing(AST::Ptr ast) {
321     if (ast->getID() == AST::V_VariableAST) return true;
322     if (ast->getID() == AST::V_RoseAST) {
323         RoseAST::Ptr r = boost::static_pointer_cast<RoseAST>(ast);
324         if (r->val().op == ROSEOperation::uMultOp || r->val().op == ROSEOperation::sMultOp) return true;
325     }
326     return false;
327 }
328
329 /*
330 bool PerformTableRead(BoundValue &target, set<int64_t> & jumpTargets, CodeSource *cs) {
331     if (target.tableReadSize > 0 && target.interval.stride == 0) {
332         // This is a PC-relative read to variable, not a table read
333         return false;
334     }
335     Address tableBase = (Address)target.interval.low;
336     Address tableLastEntry = (Address)target.interval.high;
337     int addressWidth = cs->getAddressWidth();
338     if (addressWidth == 4) {
339         tableBase &= 0xffffffff;
340         tableLastEntry &= 0xffffffff;
341     }
342
343 #if defined(os_windows)
344     tableBase -= cs->loadAddress();
345     tableLastEntry -= cs->loadAddress();
346 #endif
347
348     if (!cs->isCode(tableBase) && !cs->isData(tableBase)) {
349         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", tableBase);
350         parsing_printf("Not jump table format!\n");
351         return false;
352     }
353     if (!cs->isReadOnly(tableBase)) {
354         parsing_printf("\ttableBase 0x%lx not read only, returning false\n", tableBase);
355         parsing_printf("Not jump table format!\n");
356         return false;
357     }
358
359
360     for (Address tableEntry = tableBase; tableEntry <= tableLastEntry; tableEntry += target.interval.stride) {
361         if (!cs->isCode(tableEntry) && !cs->isData(tableEntry)) continue;
362         if (!cs->isReadOnly(tableEntry)) continue;
363         int64_t targetAddress = 0;
364         if (target.tableReadSize > 0) {
365             switch (target.tableReadSize) {
366                 case 8:
367                     targetAddress = *(const uint64_t *) cs->getPtrToInstruction(tableEntry);
368                     break;
369                 case 4:
370                     targetAddress = *(const uint32_t *) cs->getPtrToInstruction(tableEntry);
371                     if (target.isZeroExtend) break;
372                     if ((addressWidth == 8) && (targetAddress & 0x80000000)) {
373                         targetAddress |= SIGNEX_64_32;
374                     }
375                     break;
376                 case 2:
377                     targetAddress = *(const uint16_t *) cs->getPtrToInstruction(tableEntry);
378                     if (target.isZeroExtend) break;
379                     if ((addressWidth == 8) && (targetAddress & 0x8000)) {
380                         targetAddress |= SIGNEX_64_16;
381                     }
382                     if ((addressWidth == 4) && (targetAddress & 0x8000)) {
383                         targetAddress |= SIGNEX_32_16;
384                     }
385
386                     break;
387                 case 1:
388                     targetAddress = *(const uint8_t *) cs->getPtrToInstruction(tableEntry);
389                     if (target.isZeroExtend) break;
390                     if ((addressWidth == 8) && (targetAddress & 0x80)) {
391                         targetAddress |= SIGNEX_64_8;
392                     }
393                     if ((addressWidth == 4) && (targetAddress & 0x80)) {
394                         targetAddress |= SIGNEX_32_8;
395                     }
396
397                     break;
398
399                 default:
400                     parsing_printf("Invalid memory read size %d\n", target.tableReadSize);
401                     return false;
402             }
403             targetAddress *= target.multiply;
404             if (target.targetBase != 0) {
405                 if (target.isSubReadContent) 
406                     targetAddress = target.targetBase - targetAddress;
407                 else 
408                     targetAddress += target.targetBase; 
409
410             }
411 #if defined(os_windows)
412             targetAddress -= cs->loadAddress();
413 #endif
414         } else targetAddress = tableEntry;
415
416         if (addressWidth == 4) targetAddress &= 0xffffffff;
417         parsing_printf("Jumping to target %lx,", targetAddress);
418         if (cs->isCode(targetAddress)) {
419             // Jump tables may contain may repetitious entries.
420             // We only want to create one edge for disctinct each jump target.
421             jumpTargets.insert(targetAddress);
422         }
423         // If the jump target is resolved to be a constant, 
424         if (target.interval.stride == 0) break;
425     }
426     return true;
427 }
428 */