Only stop slicing when encoutering missing instruction semantics on ARM
[dyninst.git] / parseAPI / src / JumpTablePred.C
1 #include "dyntypes.h"
2 #include "Node.h"
3 #include "Graph.h"
4
5 #include "debug_parse.h"
6 #include "CodeObject.h"
7 #include "JumpTablePred.h"
8 #include "IndirectASTVisitor.h"
9
10 #include "Instruction.h"
11 #include "InstructionDecoder.h"
12
13 #include "AbslocInterface.h"
14 #include "SymEval.h"
15
16 using namespace Dyninst;
17 using namespace Dyninst::DataflowAPI;
18 using namespace Dyninst::ParseAPI;
19 using namespace Dyninst::InstructionAPI;
20 #define SIGNEX_64_32 0xffffffff00000000LL
21 #define SIGNEX_64_16 0xffffffffffff0000LL
22 #define SIGNEX_64_8  0xffffffffffffff00LL
23 #define SIGNEX_32_16 0xffff0000
24 #define SIGNEX_32_8 0xffffff00
25
26 // Assume the table contain less than this many entries.
27 #define MAX_TABLE_ENTRY 1000000
28 static void BuildEdgesAux(SliceNode::Ptr srcNode,
29                           ParseAPI::Block* curBlock,
30                           map<ParseAPI::Block*, map<AssignmentPtr, SliceNode::Ptr> > &targetMap,
31                           GraphPtr newG,
32                           set<ParseAPI::Block*> &visit,
33                           EdgeTypeEnum t,
34                           set<ParseAPI::Edge*> allowedEdges) {                   
35     if (targetMap.find(curBlock) != targetMap.end()) {
36         // This block contains at least one silce node 
37         // that is reachable from the source DFS node
38         map<AssignmentPtr, SliceNode::Ptr> &candNodes = targetMap[curBlock];
39         Address addr = 0;
40         for (auto cit = candNodes.begin(); cit != candNodes.end(); ++cit)
41             // The node has to be either in a different block from the source node
42             // or in the same block but has a larger address to be considered 
43             // reachable from the source node
44             if (cit->first->addr() > srcNode->addr() || curBlock != srcNode->block())
45                 if (addr == 0 || addr > cit->first->addr()) {
46                     addr = cit->first->addr();
47                 }
48         if (addr != 0) {
49             // There may be several assignments locating 
50             // at the same address. Need to connecting all.
51             if (t == _edgetype_end_) t = FALLTHROUGH;
52             for (auto cit = candNodes.begin(); cit != candNodes.end(); ++cit)
53                 if (cit->first->addr() > srcNode->addr() || curBlock != srcNode->block())
54                     if (addr == cit->first->addr()) {
55                         newG->insertPair(srcNode, cit->second, TypedSliceEdge::create(srcNode, cit->second, t));
56                     }
57             return;
58         }
59     }
60
61     if (visit.find(curBlock) != visit.end()) return;
62     visit.insert(curBlock);
63     for (auto eit = curBlock->targets().begin(); eit != curBlock->targets().end(); ++eit) {
64         // Xiaozhu:
65         // Our current slicing code ignores tail calls 
66         // (the slice code only checks if an edge type is CALL or not)
67         // so, I should be consistent here.
68         // If the slice code considers tail calls, need to change
69         // the predicate to (*eit)->interproc()
70         if ((*eit)->type() != CALL && (*eit)->type() != RET && allowedEdges.find(*eit) != allowedEdges.end()) {
71             EdgeTypeEnum newT = t; 
72             if (t == _edgetype_end_) {
73                 if ((*eit)->type() == COND_TAKEN || (*eit)->type() == COND_NOT_TAKEN) 
74                     newT = (*eit)->type();
75                 else 
76                     newT = FALLTHROUGH;
77             } 
78             BuildEdgesAux(srcNode, (*eit)->trg(), targetMap, newG, visit, newT, allowedEdges);     
79         }
80     }
81 }                         
82
83
84 static void BuildEdges(SliceNode::Ptr curNode,
85                        map<ParseAPI::Block*, map<AssignmentPtr, SliceNode::Ptr> > &targetMap,
86                        GraphPtr newG,
87                        set<ParseAPI::Edge*> &allowedEdges) {
88     set<ParseAPI::Block*> visit;                     
89     BuildEdgesAux(curNode, curNode->block(), targetMap, newG, visit, _edgetype_end_, allowedEdges);
90 }                      
91
92 static bool AssignIsZF(Assignment::Ptr a) {
93     return a->out().absloc().type() == Absloc::Register &&
94            (a->out().absloc().reg() == MachRegister::getZeroFlag(a->out().absloc().reg().getArchitecture()));
95 }
96
97 static bool IsPushAndChangeSP(Assignment::Ptr a) {
98     entryID id = a->insn()->getOperation().getID();
99     if (id != e_push) return false;
100     Absloc aloc = a->out().absloc();
101     if (aloc.type() == Absloc::Register && aloc.reg().isStackPointer()) return true;
102     return false;;
103
104 }
105
106 static int AdjustGraphEntryAndExit(GraphPtr gp) {
107     int nodeCount = 0;
108     NodeIterator gbegin, gend;
109     gp->allNodes(gbegin, gend);
110     for (; gbegin != gend; ++gbegin) {
111         ++nodeCount;
112         Node::Ptr ptr = *gbegin;
113         if (!ptr->hasInEdges()) gp->insertEntryNode(ptr);
114         if (!ptr->hasOutEdges()) gp->insertExitNode(ptr);
115     }
116     return nodeCount;
117 }
118
119 GraphPtr JumpTablePred::BuildAnalysisGraph(set<ParseAPI::Edge*> &visitedEdges) {
120     GraphPtr newG = Graph::createGraph();
121     
122     NodeIterator gbegin, gend;
123     parsing_printf("\t\t start to build analysis graph\n");
124
125     // Create a map to help find whether we have reached a node
126     map<ParseAPI::Block*, map<AssignmentPtr, SliceNode::Ptr> > targetMap;
127
128     // Assignments that are at these addresses have flag assignment colocated
129     set<Address> shouldSkip;
130     for (auto ait = currentAssigns.begin(); ait != currentAssigns.end(); ++ait) {
131         if (AssignIsZF(*ait))
132             shouldSkip.insert((*ait)->addr());
133     }
134     parsing_printf("\t\t calculate skipped nodes\n");
135
136     // We only need one assignment from xchg instruction at each address
137     set<Address> xchgCount;
138     set<Assignment::Ptr> xchgAssign;
139     for (auto ait = currentAssigns.begin(); ait != currentAssigns.end(); ++ait) {
140         if ((*ait)->insn()->getOperation().getID() == e_xchg) {
141             if (xchgCount.find( (*ait)->addr() ) != xchgCount.end() ) continue;
142             xchgCount.insert((*ait)->addr());
143             xchgAssign.insert(*ait);
144         }
145     }
146     parsing_printf("\t\t calculate xchg assignments\n");
147    
148     for (auto ait = currentAssigns.begin(); ait != currentAssigns.end(); ++ait) {
149         Assignment::Ptr a = *ait;
150         if (   (AssignIsZF(a) || shouldSkip.find(a->addr()) == shouldSkip.end()) 
151             && !IsPushAndChangeSP(a)
152             && (!a->insn()->writesMemory() || MatchReadAST(a))) {
153             if (a->insn()->getOperation().getID() == e_xchg && xchgAssign.find(a) == xchgAssign.end()) continue;
154             SliceNode::Ptr newNode = SliceNode::create(a, a->block(), a->func());
155             targetMap[a->block()][a] = newNode;
156             newG->addNode(newNode);
157         }
158     }
159     parsing_printf("\t\t calculate nodes in the new graph\n");
160     // Start from each node to do DFS and build edges
161     newG->allNodes(gbegin, gend);
162     for (; gbegin != gend; ++gbegin) {
163         SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*gbegin);
164         BuildEdges(node, targetMap, newG, visitedEdges);
165     }
166     parsing_printf("\t\t calculate edges in the new graph\n");
167
168     // Build a virtual exit node
169     SliceNode::Ptr virtualExit = SliceNode::create(Assignment::Ptr(), NULL, NULL);
170     newG->addNode(virtualExit);
171     newG->allNodes(gbegin, gend);
172     for (; gbegin != gend; ++gbegin) {
173         SliceNode::Ptr cur = boost::static_pointer_cast<SliceNode>(*gbegin);
174         if (!cur->hasOutEdges() && cur != virtualExit) {
175             newG->insertPair(cur, virtualExit, TypedSliceEdge::create(cur, virtualExit, FALLTHROUGH));
176         }
177     }
178     parsing_printf("\t\t calculate virtual nodes in the new graph\n");
179
180     AdjustGraphEntryAndExit(newG);
181
182
183     return newG;
184
185 }
186
187
188 bool JumpTablePred::addNodeCallback(AssignmentPtr ap, set<ParseAPI::Edge*> &visitedEdges) {
189     if (!jumpTableFormat) return false;
190     if (unknownInstruction) return false;
191     if (currentAssigns.find(ap) != currentAssigns.end()) return true;
192     if (currentAssigns.size() > 50) return false; 
193     // For flags, we only analyze zf
194     if (ap->out().absloc().type() == Absloc::Register) {
195         MachRegister reg = ap->out().absloc().reg();
196         if (reg.isFlag() && reg != MachRegister::getZeroFlag(reg.getArchitecture())) {
197             return true;
198         }
199     }
200     pair<AST::Ptr, bool> expandRet = ExpandAssignment(ap);
201
202     currentAssigns.insert(ap);
203
204     parsing_printf("Adding assignment %s in instruction %s at %lx, total %d\n", ap->format().c_str(), ap->insn()->format().c_str(), ap->addr(), currentAssigns.size());
205 /*
206     if (ap->insn() && ap->insn()->readsMemory() && firstMemoryRead) {
207         firstMemoryRead = false;
208         parsing_printf("\tThe first memory read, check if format is correct\n");
209         if 
210     }
211 */
212
213     if (!expandRet.second || expandRet.first == NULL) {
214         if (ap && ap->block() && ap->block()->obj()->cs()->getArch() == Arch_aarch64) {
215             unknownInstruction = true;
216         }
217         return true;
218     }
219
220     // If this assignment writes memory,
221     // we only want to analyze it when it writes to 
222     // an AST we have seen before and potentially
223     // can used for aliasing
224     if (ap->insn()->writesMemory()) {
225         if (!MatchReadAST(ap)) return true;
226     }
227
228     // If this assignment reads memory,
229     // we record the AST of the read so
230     // that in the future we can match a
231     // corresponding write to identify aliasing
232     if (ap->insn()->readsMemory() && expandRet.first->getID() == AST::V_RoseAST) {
233         RoseAST::Ptr roseAST = boost::static_pointer_cast<RoseAST>(expandRet.first);
234         if (roseAST->val().op == ROSEOperation::derefOp) {
235             readAST.push_back(expandRet.first);
236         }
237     }
238     // I really do not want to redo the analysis from scratch,
239     // but it seems like with newly added edges and nodes,
240     // it is necessary to redo.
241
242     // We create the CFG based on the found nodes
243     GraphPtr g = BuildAnalysisGraph(visitedEdges);
244     BoundFactsCalculator bfc(func, g, func->entry() == block, rf, thunks, block->last(), false, expandCache);
245     bfc.CalculateBoundedFacts();
246
247     BoundValue target;
248     bool ijt = IsJumpTable(g, bfc, target);
249     if (ijt) {
250         bool ret = !FillInOutEdges(target, outEdges) || outEdges.empty();
251         // Now we have stopped slicing in advance, so the cache contents are not complete any more.
252         if (!ret) setClearCache(true);
253         return ret;
254     } else {
255         return true;
256     }   
257
258
259
260 }
261 bool JumpTablePred::FillInOutEdges(BoundValue &target, 
262                                                  vector<pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
263     set<Address> jumpTargets;                                            
264     outEdges.clear();
265     Address tableBase = target.interval.low;
266     Address tableLastEntry = target.interval.high;
267     int addressWidth = block->obj()->cs()->getAddressWidth();
268     if (addressWidth == 4) {
269         tableBase &= 0xffffffff;
270         tableLastEntry &= 0xffffffff;
271     }
272
273 #if defined(os_windows)
274     tableBase -= block->obj()->cs()->loadAddress();
275     tableLastEntry -= block->obj()->cs()->loadAddress();
276 #endif
277
278     parsing_printf("The final target bound fact:\n");
279     target.Print();
280     if (!block->obj()->cs()->isCode(tableBase) && !block->obj()->cs()->isData(tableBase)) {
281         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", tableBase);
282         jumpTableFormat = false;
283         parsing_printf("Not jump table format!\n");
284         return false;
285     }
286     if (!block->obj()->cs()->isReadOnly(tableBase)) {
287         parsing_printf("\ttableBase 0x%lx not read only, returning false\n", tableBase);
288         jumpTableFormat = false;
289         parsing_printf("Not jump table format!\n");
290         return false;
291     }
292
293
294     for (Address tableEntry = tableBase; tableEntry <= tableLastEntry; tableEntry += target.interval.stride) {
295         if (!block->obj()->cs()->isCode(tableEntry) && !block->obj()->cs()->isData(tableEntry)) continue;
296         if (!block->obj()->cs()->isReadOnly(tableEntry)) continue;
297         int targetAddress = 0;
298         if (target.tableReadSize > 0) {
299             switch (target.tableReadSize) {
300                 case 8:
301                     targetAddress = *(const uint64_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
302                     break;
303                 case 4:
304                     targetAddress = *(const uint32_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
305                     if (target.isZeroExtend) break;
306                     if ((addressWidth == 8) && (targetAddress & 0x80000000)) {
307                         targetAddress |= SIGNEX_64_32;
308                     }
309                     break;
310                 case 2:
311                     targetAddress = *(const uint16_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
312                     if (target.isZeroExtend) break;
313                     if ((addressWidth == 8) && (targetAddress & 0x8000)) {
314                         targetAddress |= SIGNEX_64_16;
315                     }
316                     if ((addressWidth == 4) && (targetAddress & 0x8000)) {
317                         targetAddress |= SIGNEX_32_16;
318                     }
319
320                     break;
321                 case 1:
322                     targetAddress = *(const uint8_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
323                     if (target.isZeroExtend) break;
324                     if ((addressWidth == 8) && (targetAddress & 0x80)) {
325                         targetAddress |= SIGNEX_64_8;
326                     }
327                     if ((addressWidth == 4) && (targetAddress & 0x80)) {
328                         targetAddress |= SIGNEX_32_8;
329                     }
330
331                     break;
332
333                 default:
334                     parsing_printf("Invalid memory read size %d\n", target.tableReadSize);
335                     return false;
336             }
337             targetAddress *= target.multiply;
338             if (target.targetBase != 0) {
339                 if (target.isSubReadContent) 
340                     targetAddress = target.targetBase - targetAddress;
341                 else 
342                     targetAddress += target.targetBase; 
343
344             }
345 #if defined(os_windows)
346             targetAddress -= block->obj()->cs()->loadAddress();
347 #endif
348         } else targetAddress = tableEntry;
349
350         if (addressWidth == 4) targetAddress &= 0xffffffff;
351         parsing_printf("Jumping to target %lx,", targetAddress);
352         if (block->obj()->cs()->isCode(targetAddress)) {
353             // Jump tables may contain may repetitious entries.
354             // We only want to create one edge for disctinct each jump target.
355             jumpTargets.insert(targetAddress);
356             parsing_printf(" is code.\n" );
357         } else {
358             parsing_printf(" not code.\n");
359         }
360         // If the jump target is resolved to be a constant, 
361         if (target.interval.stride == 0) break;
362     }
363     for (auto tit = jumpTargets.begin(); tit != jumpTargets.end(); ++tit) {
364         outEdges.push_back(make_pair(*tit, INDIRECT));
365     }
366     return true;
367 }
368 bool JumpTablePred::IsJumpTable(GraphPtr slice, 
369                                               BoundFactsCalculator &bfc,
370                                               BoundValue &target) {
371     NodeIterator exitBegin, exitEnd, srcBegin, srcEnd;
372     slice->exitNodes(exitBegin, exitEnd);
373     SliceNode::Ptr virtualExit = boost::static_pointer_cast<SliceNode>(*exitBegin);
374     virtualExit->ins(srcBegin, srcEnd);
375     SliceNode::Ptr jumpNode = boost::static_pointer_cast<SliceNode>(*srcBegin);
376     
377     const Absloc &loc = jumpNode->assign()->out().absloc();
378     parsing_printf("Checking final bound fact for %s\n",loc.format().c_str()); 
379     BoundFact *bf = bfc.GetBoundFactOut(virtualExit);
380     VariableAST::Ptr ip = VariableAST::create(Variable(loc));
381     BoundValue *tarBoundValue = bf->GetBound(ip);
382     if (tarBoundValue != NULL) {
383         target = *(tarBoundValue);
384         uint64_t s = target.interval.size();
385         if (s > 0 && s <= MAX_TABLE_ENTRY) return true;
386     }
387     AST::Ptr ipExp = bf->GetAlias(ip);
388     if (ipExp) {
389         parsing_printf("\t jump target expression %s\n", ipExp->format().c_str());
390         JumpTableFormatVisitor jtfv(block);
391         ipExp->accept(&jtfv);
392         if (!jtfv.format) {
393             parsing_printf("\t Not jump table format!\n");
394             jumpTableFormat = false;
395         }
396     }
397
398     return false;
399 }
400
401 bool JumpTablePred::MatchReadAST(Assignment::Ptr a) {
402     pair<AST::Ptr, bool> expandRet = ExpandAssignment(a);
403     if (!expandRet.second || expandRet.first == NULL) return false;
404     if (a->out().generator() == NULL) return false;
405     AST::Ptr write = SimplifyAnAST(RoseAST::create(ROSEOperation(ROSEOperation::derefOp, a->out().size()), a->out().generator()), 
406                                    PCValue(a->addr(),
407                                            a->insn()->size(),
408                                            a->block()->obj()->cs()->getArch()));
409
410     if (write == NULL) return false;
411     for (auto ait = readAST.begin(); ait != readAST.end(); ++ait) 
412         if (*write == **ait) return true;
413     return false;
414 }
415
416 pair<AST::Ptr, bool> JumpTablePred::ExpandAssignment(Assignment::Ptr assign) {
417     if (expandCache.find(assign) != expandCache.end()) {
418         AST::Ptr ast = expandCache[assign];
419         if (ast) return make_pair(ast, true); else return make_pair(ast, false);
420
421     } else {
422                 parsing_printf("\t\tExpanding instruction @ %x: %s\n", assign->addr(), assign->insn()->format().c_str());
423         pair<AST::Ptr, bool> expandRet = SymEval::expand(assign, false);
424         if (expandRet.second && expandRet.first) {
425 parsing_printf("Original expand: %s\n", expandRet.first->format().c_str());
426
427             AST::Ptr calculation = SimplifyAnAST(expandRet.first, 
428                                                  PCValue(assign->addr(),
429                                                          assign->insn()->size(),
430                                                          assign->block()->obj()->cs()->getArch()));
431             expandCache[assign] = calculation;
432         } else {
433             expandCache[assign] = AST::Ptr();
434         }
435         return make_pair( expandCache[assign], expandRet.second );
436     }
437 }
438