1. When we find potential indexing variable with table stride being 1, we need to...
[dyninst.git] / parseAPI / src / SymbolicExpression.C
1 #include "SymbolicExpression.h"
2 #include "SymEval.h"
3 #include "Absloc.h"
4 #include "debug_parse.h"
5 #include "IndirectASTVisitor.h"
6 #include "CFG.h"
7 #include "CodeSource.h"
8 #include "CodeObject.h"
9 using namespace std;
10 using namespace Dyninst;
11 using namespace Dyninst::ParseAPI;
12 using namespace Dyninst::DataflowAPI;
13
14 CodeSource* SymbolicExpression::cs = NULL;
15
16 bool SymbolicExpression::ReadMemory(Address addr, uint64_t &v, int ) {
17     int addressWidth = cs->getAddressWidth();
18     if (addressWidth == 4) {
19         addr &= 0xffffffff;
20     }
21
22 #if defined(os_windows)
23     addr -= cs->loadAddress();
24 #endif
25     if (!cs->isCode(addr) && !cs->isData(addr)) return false;
26     v = *(const uint64_t *) cs->getPtrToInstruction(addr);
27 /*
28     switch (memoryReadSize) {
29         case 0:
30         case 8:
31             v = *(const uint64_t *) cs->getPtrToInstruction(addr);
32             break;
33         case 4:
34             v = *(const uint32_t *) cs->getPtrToInstruction(addr);
35             break;
36         case 2:
37             v = *(const uint16_t *) cs->getPtrToInstruction(addr);
38             break;
39         case 1:
40             v = *(const uint8_t *) cs->getPtrToInstruction(addr);
41             break;          
42         default:
43             parsing_printf("Invalid memory read size %d\n", memoryReadSize);
44             return false;
45     }
46 */
47     return true;
48 }
49
50 AST::Ptr SymbolicExpression::SimplifyRoot(AST::Ptr ast, Address addr, bool keepMultiOne) {
51     if (ast->getID() == AST::V_RoseAST) {
52         RoseAST::Ptr roseAST = boost::static_pointer_cast<RoseAST>(ast); 
53         
54         switch (roseAST->val().op) {
55             case ROSEOperation::invertOp:
56                 if (roseAST->child(0)->getID() == AST::V_RoseAST) {
57                     RoseAST::Ptr child = boost::static_pointer_cast<RoseAST>(roseAST->child(0));
58                     if (child->val().op == ROSEOperation::invertOp) return child->child(0);
59                 } else if (roseAST->child(0)->getID() == AST::V_ConstantAST) {
60                     ConstantAST::Ptr child = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
61                     size_t size = child->val().size;
62                     uint64_t val = child->val().val;
63                     if (size < 64) {
64                         uint64_t mask = (1ULL << size) - 1;
65                         val = (~val) & mask;
66                     } else
67                         val = ~val;
68                     return ConstantAST::create(Constant(val, size));
69                 }
70                 break;
71             case ROSEOperation::extendMSBOp: {
72                 return roseAST->child(0);
73             }
74             case ROSEOperation::extractOp: {
75                 if (roseAST->child(0)->getID() == AST::V_ConstantAST) {
76                     size_t size = roseAST->val().size;
77                     ConstantAST::Ptr child0 = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
78                     return ConstantAST::create(Constant(child0->val().val,size));
79                 }
80                 return roseAST->child(0);
81             }
82             case ROSEOperation::signExtendOp: {
83                 if (roseAST->child(0)->getID() == AST::V_ConstantAST && roseAST->child(1)->getID() == AST::V_ConstantAST) {
84                     ConstantAST::Ptr child0 = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
85                     ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
86                     uint64_t val = child0->val().val;
87                     if (val & (1 << (child0->val().size - 1))) {
88                         switch (child0->val().size) {
89                            case 16:
90                                val = val | SIGNEX_64_16;
91                                break;
92                            case 32:
93                                val = val | SIGNEX_64_32;
94                                break;
95                            default:
96                                break;
97                         }
98                     } 
99                     size_t size = child1->val().val;
100                     return ConstantAST::create(Constant(val,size));
101                 }                   
102                 return roseAST->child(0);
103             }
104             case ROSEOperation::concatOp: {         
105                 if (roseAST->child(0)->getID() == AST::V_ConstantAST && roseAST->child(1)->getID() == AST::V_ConstantAST) {
106                     ConstantAST::Ptr child0 = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
107                     ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
108                     uint64_t val = (child1->val().val << child0->val().size) + child0->val().val;
109                     size_t size = child1->val().size + child0->val().size;
110                     return ConstantAST::create(Constant(val,size));
111                 }                   
112                 return roseAST->child(0);
113             }
114             case ROSEOperation::addOp:
115                 // We simplify the addition as much as we can
116                 // Case 1: two constants
117                 if (roseAST->child(0)->getID() == AST::V_ConstantAST && roseAST->child(1)->getID() == AST::V_ConstantAST) {
118                     ConstantAST::Ptr child0 = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
119                     ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
120                     uint64_t val = child0->val().val + child1->val().val;
121                     size_t size;
122                     if (child0->val().size > child1->val().size)
123                         size = child0->val().size;
124                     else
125                         size = child1->val().size;
126                     return ConstantAST::create(Constant(val,size));
127                 }
128                 // Case 2: anything adding zero stays the same
129                 if (roseAST->child(0)->getID() == AST::V_ConstantAST) {
130                     ConstantAST::Ptr child = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
131                     if (child->val().val == 0) return roseAST->child(1);
132                 }
133                 if (roseAST->child(1)->getID() == AST::V_ConstantAST) {
134                     ConstantAST::Ptr child = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
135                     if (child->val().val == 0) return roseAST->child(0);
136                 }
137                 // Case 3: if v + v * c = v * (c+1), where v is a variable and c is a constant
138                 if (roseAST->child(0)->getID() == AST::V_VariableAST && roseAST->child(1)->getID() == AST::V_RoseAST) {
139                     RoseAST::Ptr rOp = boost::static_pointer_cast<RoseAST>(roseAST->child(1));
140                     if (rOp->val().op == ROSEOperation::uMultOp || rOp->val().op == ROSEOperation::sMultOp) {
141                         if (rOp->child(0)->getID() == AST::V_VariableAST && rOp->child(1)->getID() == AST::V_ConstantAST) {
142                             VariableAST::Ptr varAST1 = boost::static_pointer_cast<VariableAST>(roseAST->child(0));
143                             VariableAST::Ptr varAST2 = boost::static_pointer_cast<VariableAST>(rOp->child(0));
144                             if (varAST1->val().reg == varAST2->val().reg) {
145                                 ConstantAST::Ptr oldC = boost::static_pointer_cast<ConstantAST>(rOp->child(1));
146                                 ConstantAST::Ptr newC = ConstantAST::create(Constant(oldC->val().val + 1, oldC->val().size));
147                                 RoseAST::Ptr newRoot = RoseAST::create(ROSEOperation(rOp->val()), varAST1, newC);
148                                 return newRoot;
149                             }
150                         }
151                     }
152                 } 
153                 break;
154             case ROSEOperation::sMultOp:
155             case ROSEOperation::uMultOp:
156                 if (roseAST->child(0)->getID() == AST::V_ConstantAST && roseAST->child(1)->getID() == AST::V_ConstantAST) {
157                     ConstantAST::Ptr child0 = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
158                     ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
159                     return ConstantAST::create(Constant(child0->val().val * child1->val().val, 64));
160                 }
161                 if (roseAST->child(0)->getID() == AST::V_ConstantAST) {
162                     ConstantAST::Ptr child0 = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
163                     if (child0->val().val == 1 && !keepMultiOne) return roseAST->child(1);
164                 }
165
166                 if (roseAST->child(1)->getID() == AST::V_ConstantAST) {
167                     ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
168                     if (child1->val().val == 1 && !keepMultiOne) return roseAST->child(0);
169                 }
170                 break;
171             case ROSEOperation::xorOp:
172                 if (roseAST->child(0)->getID() == AST::V_VariableAST && roseAST->child(1)->getID() == AST::V_VariableAST) {
173                     VariableAST::Ptr child0 = boost::static_pointer_cast<VariableAST>(roseAST->child(0)); 
174                     VariableAST::Ptr child1 = boost::static_pointer_cast<VariableAST>(roseAST->child(1)); 
175                     if (child0->val() == child1->val()) {
176                         return ConstantAST::create(Constant(0 , 32));
177                     }
178                 }
179                 break;
180             case ROSEOperation::derefOp:
181                 // Any 8-bit value is bounded in [0,255].
182                 // Need to keep the length of the dereference if it is 8-bit.
183                 // However, dereference longer than 8-bit should be regarded the same.
184                 if (roseAST->child(0)->getID() == AST::V_ConstantAST) {
185                     uint64_t val = 0;
186                     ConstantAST::Ptr c = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
187                     Address addr = c->val().val;
188                     if (ReadMemory(addr, val, roseAST->val().size / 8)) {
189                         return ConstantAST::create(Constant(val, 64));
190                     }
191                 }
192                 if (roseAST->val().size == 8)
193                     return ast;
194                 else
195                     return RoseAST::create(ROSEOperation(ROSEOperation::derefOp), ast->child(0));
196                 break;
197             case ROSEOperation::shiftLOp:
198             case ROSEOperation::rotateLOp:
199                 if (roseAST->child(0)->getID() == AST::V_ConstantAST && roseAST->child(1)->getID() == AST::V_ConstantAST) {
200                     ConstantAST::Ptr child0 = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
201                     ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
202                     return ConstantAST::create(Constant(child0->val().val << child1->val().val, 64));
203                 }
204                 if (roseAST->child(1)->getID() == AST::V_ConstantAST) {
205                     parsing_printf("keep multi one %d\n", keepMultiOne);
206                     ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
207                     if (child1->val().val == 0 && !keepMultiOne) return roseAST->child(0);
208                 }
209                 break;
210             case ROSEOperation::andOp:
211                 if (roseAST->child(0)->getID() == AST::V_ConstantAST && roseAST->child(1)->getID() == AST::V_ConstantAST) {
212                     ConstantAST::Ptr child0 = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
213                     ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
214                     return ConstantAST::create(Constant(child0->val().val & child1->val().val, 64));
215                 }
216                 break;
217             case ROSEOperation::orOp:
218                 if (roseAST->child(0)->getID() == AST::V_ConstantAST && roseAST->child(1)->getID() == AST::V_ConstantAST) {
219                     ConstantAST::Ptr child0 = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
220                     ConstantAST::Ptr child1 = boost::static_pointer_cast<ConstantAST>(roseAST->child(1));
221                     return ConstantAST::create(Constant(child0->val().val | child1->val().val, 64));
222                 }
223                 break;
224             case ROSEOperation::ifOp:
225                 if (roseAST->child(0)->getID() == AST::V_ConstantAST) {
226                     ConstantAST::Ptr c = boost::static_pointer_cast<ConstantAST>(roseAST->child(0));
227                     if (c->val().val != 0) {
228                         return roseAST->child(1);
229                     } else {
230                         return roseAST->child(2);
231                     }
232
233                 }
234                 break;
235             default:
236                 break;
237
238         }
239     } else if (ast->getID() == AST::V_VariableAST) {
240         VariableAST::Ptr varAST = boost::static_pointer_cast<VariableAST>(ast);
241         if (varAST->val().reg.absloc().isPC()) {
242             MachRegister pc = varAST->val().reg.absloc().reg();     
243             return ConstantAST::create(Constant(addr, getArchAddressWidth(pc.getArchitecture()) * 8));
244         }
245         // We do not care about the address of the a-loc
246         // because we will keep tracking the changes of 
247         // each a-loc. Also, this brings a benefit that
248         // we can directly use ast->isStrictEqual() to 
249         // compare two ast.
250         return VariableAST::create(Variable(varAST->val().reg));
251     } else if (ast->getID() == AST::V_ConstantAST) {
252         ConstantAST::Ptr constAST = boost::static_pointer_cast<ConstantAST>(ast);
253         size_t size = constAST->val().size;
254         uint64_t val = constAST->val().val;     
255         if (size == 32)
256             if (!(val & (1ULL << (size - 1))))
257                 return ConstantAST::create(Constant(val, 64));
258     }
259
260     return ast;
261 }
262
263
264 AST::Ptr SymbolicExpression::SimplifyAnAST(AST::Ptr ast, Address addr, bool keepMultiOne) {
265     SimplifyVisitor sv(addr, keepMultiOne);
266     ast->accept(&sv);
267     return SimplifyRoot(ast, addr, keepMultiOne);
268 }
269
270 bool SymbolicExpression::ContainAnAST(AST::Ptr root, AST::Ptr check) {
271     if (*root == *check) return true;
272     bool ret = false;
273     unsigned totalChildren = root->numChildren();
274     for (unsigned i = 0 ; i < totalChildren && !ret; ++i) {
275         ret |= ContainAnAST(root->child(i), check);
276     }
277     return ret;
278 }
279
280
281 AST::Ptr SymbolicExpression::DeepCopyAnAST(AST::Ptr ast) {
282     if (ast->getID() == AST::V_RoseAST) {
283         RoseAST::Ptr roseAST = boost::static_pointer_cast<RoseAST>(ast);
284         AST::Children kids;
285         unsigned totalChildren = ast->numChildren();
286         for (unsigned i = 0 ; i < totalChildren; ++i) {
287             kids.push_back(DeepCopyAnAST(ast->child(i)));
288         }
289         return RoseAST::create(ROSEOperation(roseAST->val()), kids);
290     } else if (ast->getID() == AST::V_VariableAST) {
291         VariableAST::Ptr varAST = boost::static_pointer_cast<VariableAST>(ast);
292         return VariableAST::create(Variable(varAST->val()));
293     } else if (ast->getID() == AST::V_ConstantAST) {
294         ConstantAST::Ptr constAST = boost::static_pointer_cast<ConstantAST>(ast);
295         return ConstantAST::create(Constant(constAST->val()));
296     } else if (ast->getID() == AST::V_BottomAST) {
297         BottomAST::Ptr bottomAST = boost::static_pointer_cast<BottomAST>(ast);
298         return BottomAST::create(bottomAST->val());
299     }
300     fprintf(stderr, "ast type %d, %s\n", ast->getID(), ast->format().c_str());
301     assert(0);
302         return AST::Ptr();
303 }
304
305 pair<AST::Ptr, bool> SymbolicExpression::ExpandAssignment(Assignment::Ptr assign, bool keepMultiOne) {
306     if (expandCache.find(assign) != expandCache.end()) {
307         AST::Ptr ast = expandCache[assign];
308         if (ast) {
309             if (!keepMultiOne) ast = SimplifyAnAST(ast, 0, keepMultiOne);
310             return make_pair(ast, true);
311         } 
312         else {
313             return make_pair(ast, false);
314         }
315     } else {
316         parsing_printf("\t\tExpanding instruction @ %x: %s, assignment %s\n", assign->addr(), assign->insn()->format().c_str(), assign->format().c_str());
317         pair<AST::Ptr, bool> expandRet = SymEval::expand(assign, false);
318         if (expandRet.second && expandRet.first) {
319             parsing_printf("Original expand: %s\n", expandRet.first->format().c_str());
320             AST::Ptr calculation = SimplifyAnAST(expandRet.first, 
321                                                  PCValue(assign->addr(),
322                                                          assign->insn()->size(),
323                                                          assign->block()->obj()->cs()->getArch()),
324                                                  true);
325             expandCache[assign] = calculation;
326         } else {
327             if (expandRet.first == NULL) {
328                 parsing_printf("\t\t\t expansion returned null ast\n");
329             }
330             if (expandRet.second == false) {
331                 parsing_printf("\t\t\t expansion returned false\n");
332             }
333             expandCache[assign] = AST::Ptr();
334         }
335         AST::Ptr ast = expandCache[assign];
336         if (ast && !keepMultiOne) ast = SimplifyAnAST(ast, 0, keepMultiOne);
337         return make_pair( ast, expandRet.second );
338     }
339 }
340
341 AST::Ptr SymbolicExpression::SubstituteAnAST(AST::Ptr ast, const map<AST::Ptr, AST::Ptr> &aliasMap) {
342     for (auto ait = aliasMap.begin(); ait != aliasMap.end(); ++ait)
343         if (*ast == *(ait->first)) {
344             return ait->second;
345         }
346     unsigned totalChildren = ast->numChildren();
347     for (unsigned i = 0 ; i < totalChildren; ++i) {
348         ast->setChild(i, SubstituteAnAST(ast->child(i), aliasMap));
349     }
350     if (ast->getID() == AST::V_VariableAST) {
351         // If this variable is not in the aliasMap yet,
352         // this variable is from the input.
353         VariableAST::Ptr varAST = boost::static_pointer_cast<VariableAST>(ast);
354         return VariableAST::create(Variable(varAST->val().reg, 1));
355     }
356     return ast;
357 }
358
359 Address SymbolicExpression::PCValue(Address cur, size_t insnSize, Architecture a) {
360     switch (a) {
361         case Arch_x86:
362         case Arch_x86_64:
363             return cur + insnSize;
364         case Arch_aarch64:
365         case Arch_ppc32:
366         case Arch_ppc64:
367             return cur;
368         case Arch_aarch32:
369         case Arch_none:
370             assert(0);
371     }    
372     return cur + insnSize;
373 }