2 #include "CodeObject.h"
3 #include "CodeSource.h"
5 #include "BoundFactCalculator.h"
6 #include "IndirectASTVisitor.h"
7 #include "debug_parse.h"
8 #include "BackwardSlicing.h"
9 #include "Instruction.h"
11 using namespace Dyninst::InstructionAPI;
13 void BoundFactsCalculator::NaturalDFS(Node::Ptr cur) {
15 NodeIterator nbegin, nend;
16 cur->outs(nbegin, nend);
18 for (; nbegin != nend; ++nbegin)
19 if (nodeColor.find(*nbegin) == nodeColor.end())
22 reverseOrder.push_back(cur);
26 void BoundFactsCalculator::ReverseDFS(Node::Ptr cur) {
28 analysisOrder[cur] = orderStamp;
29 NodeIterator nbegin, nend;
30 cur->ins(nbegin, nend);
32 for (; nbegin != nend; ++nbegin)
33 if (nodeColor.find(*nbegin) == nodeColor.end())
37 static void BuildEdgeFromVirtualEntry(SliceNode::Ptr virtualEntry,
38 ParseAPI::Block *curBlock,
39 map<ParseAPI::Block*, vector<SliceNode::Ptr> >&targetMap,
40 set<ParseAPI::Block*> &visit,
42 if (targetMap.find(curBlock) != targetMap.end()) {
43 const vector<SliceNode::Ptr> &targets = targetMap[curBlock];
44 for (auto nit = targets.begin(); nit != targets.end(); ++nit) {
45 SliceNode::Ptr trgNode = *nit;
46 slice->insertPair(virtualEntry, trgNode, TypedSliceEdge::create(virtualEntry, trgNode, FALLTHROUGH));
50 if (visit.find(curBlock) != visit.end()) return;
51 visit.insert(curBlock);
52 for (auto eit = curBlock->targets().begin(); eit != curBlock->targets().end(); ++eit)
53 if ((*eit)->type() != CALL && (*eit)->type() != RET) {
54 BuildEdgeFromVirtualEntry(virtualEntry, (*eit)->trg(), targetMap, visit, slice);
58 void BoundFactsCalculator::DetermineAnalysisOrder() {
59 NodeIterator nbegin, nend;
60 slice->allNodes(nbegin, nend);
64 analysisOrder.clear();
65 for (; nbegin != nend; ++nbegin)
66 if (nodeColor.find(*nbegin) == nodeColor.end()) {
71 for (auto nit = reverseOrder.rbegin(); nit != reverseOrder.rend(); ++nit)
72 if (nodeColor.find(*nit) == nodeColor.end()) {
77 // Create a virtual entry node that has
78 // edges into all entry SCCs
79 SliceNode::Ptr virtualEntry = SliceNode::create(Assignment::Ptr(), func->entry(), func);
80 analysisOrder[virtualEntry] = 0;
81 for (int curOrder = 1; curOrder <= orderStamp; ++curOrder) {
82 // First determine all nodes in this SCC
83 vector<Node::Ptr> curNodes;
84 NodeIterator nbegin, nend;
85 slice->allNodes(nbegin, nend);
86 for (; nbegin != nend; ++nbegin) {
87 if (analysisOrder[*nbegin] == curOrder) {
88 curNodes.push_back(*nbegin);
92 // If this SCC does not have any outside edge,
93 // it is an entry SCC and we need to connect
94 // the virtual entry to it
95 if (!HasIncomingEdgesFromLowerLevel(curOrder, curNodes)) {
96 if (curNodes.size() == 1) {
97 // If the SCC has only one node,
98 // we connect the virtual entry to this single node
99 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*(curNodes.begin()));
100 slice->insertPair(virtualEntry, node, TypedSliceEdge::create(virtualEntry, node, FALLTHROUGH));
102 // If there are more than one node in this SCC,
103 // we do a DFS to see which nodes in the SCC can be
104 // reached from the entry of the function without passing
105 // through other nodes in the SCC.
106 // Basically, we only connect edges from the virtual entry
107 // to the entries of the SCC
108 set<ParseAPI::Block*> visit;
109 map<ParseAPI::Block*, vector<SliceNode::Ptr> >targetMap;
110 for (auto nit = curNodes.begin(); nit != curNodes.end(); ++nit) {
111 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*nit);
112 ParseAPI::Block * b = node->block();
113 Address addr = node->addr();
114 if (targetMap.find(b) == targetMap.end()) {
115 targetMap[b].push_back(node);
116 } else if (targetMap[b][0]->addr() > addr) {
117 targetMap[b].clear();
118 targetMap[b].push_back(node);
119 } else if (targetMap[b][0]->addr() == addr) {
120 targetMap[b].push_back(node);
123 BuildEdgeFromVirtualEntry(virtualEntry, virtualEntry->block(), targetMap, visit, slice);
127 slice->clearEntryNodes();
128 slice->markAsEntryNode(virtualEntry);
131 bool BoundFactsCalculator::HasIncomingEdgesFromLowerLevel(int curOrder, vector<Node::Ptr>& curNodes) {
132 for (auto nit = curNodes.begin(); nit != curNodes.end(); ++nit) {
133 Node::Ptr cur = *nit;
134 NodeIterator nbegin, nend;
135 cur->ins(nbegin, nend);
136 for (; nbegin != nend; ++nbegin)
137 if (analysisOrder[*nbegin] < curOrder) return true;
143 bool BoundFactsCalculator::CalculateBoundedFacts() {
144 /* We use a dataflow analysis to calculate the value bound
145 * of each register and potentially some memory locations.
146 * The key steps of the dataflow analysis are
147 * 1. Determine the analysis order:
148 * First calculate all strongly connected components (SCC)
149 * of the graph. The flow analysis inside a SCC is
150 * iterative. The flow analysis between several SCCs
151 * is done topologically.
152 * 2. For each node, need to calculate the meet and
153 * calculate the transfer function.
154 * 1. The meet should be simply an intersection of all the bounded facts
156 * 2. To calculate the transfer function, we first get the symbolic expression
157 * of the instrution for the node. Then depending on the instruction operation
158 * and the meet result, we know what are still bounded. For example, loading
159 * memory is always unbounded; doing and operation on a register with a constant
160 * makes the register bounded.
163 DetermineAnalysisOrder();
165 if (jumpAddr == 0x4049c1) {
166 slice->printDOT("target_final.dot");
170 queue<Node::Ptr> workingList;
171 unordered_set<Node::Ptr, Node::NodePtrHasher> inQueue;
172 unordered_map<Node::Ptr, int, Node::NodePtrHasher> inQueueLimit;
174 for (int curOrder = 0; curOrder <= orderStamp; ++curOrder) {
175 // We first determine which nodes are
177 vector<Node::Ptr> curNodes;
178 NodeIterator nbegin, nend;
179 slice->allNodes(nbegin, nend);
180 for (; nbegin != nend; ++nbegin) {
181 if (analysisOrder[*nbegin] == curOrder) {
182 curNodes.push_back(*nbegin);
183 workingList.push(*nbegin);
184 inQueue.insert(*nbegin);
188 if (!HasIncomingEdgesFromLowerLevel(curOrder, curNodes)) {
189 // If this SCC is an entry SCC,
190 // we choose a node inside the SCC
191 // and let it be top.
192 // This should only contain the virtual entry node
193 parsing_printf("This SCC does not incoming edges from outside\n");
194 boundFacts[curNodes[0]] = new BoundFact();
196 parsing_printf("Starting analysis inside SCC %d\n", curOrder);
197 // We now start iterative analysis inside the SCC
198 while (!workingList.empty()) {
199 Node::Ptr curNode = workingList.front();
201 inQueue.erase(curNode);
203 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode);
204 ++inQueueLimit[curNode];
205 if (inQueueLimit[curNode] > IN_QUEUE_LIMIT) continue;
207 BoundFact* oldFact = GetBoundFact(curNode);
208 parsing_printf("Calculate Meet for %lx", node->addr());
209 if (node->assign()) {
210 parsing_printf(", insn: %s\n", node->assign()->insn()->format().c_str());
213 if (node->block() == NULL)
214 parsing_printf(", the VirtualExit node\n");
216 parsing_printf(", the VirtualEntry node\n");
219 parsing_printf("\tOld fact for %lx:\n", node->addr());
220 if (oldFact == NULL) parsing_printf("\t\t do not exist\n"); else oldFact->Print();
221 BoundFact* newFact = Meet(curNode);
222 parsing_printf("\tNew fact at %lx\n", node->addr());
223 if (newFact != NULL) newFact->Print(); else parsing_printf("\t\tNot calculated\n");
224 if (newFact != NULL && (oldFact == NULL || *oldFact != *newFact)) {
225 parsing_printf("\tFacts change!\n");
226 if (oldFact != NULL) delete oldFact;
227 boundFacts[curNode] = newFact;
228 curNode->outs(nbegin, nend);
229 for (; nbegin != nend; ++nbegin)
230 // We only add node inside current SCC into the working list
231 if (inQueue.find(*nbegin) == inQueue.end() && analysisOrder[*nbegin] == curOrder) {
232 workingList.push(*nbegin);
233 inQueue.insert(*nbegin);
236 if (newFact != NULL) delete newFact;
237 parsing_printf("\tFacts do not change!\n");
245 if (jumpAddr == 0x805351a) {
246 VariableAST::Ptr ecx = VariableAST::create(Variable(AbsRegion(Absloc(x86::ecx))));
247 BoundValue *val = newFact->GetBound(ecx);
249 printf("\tecx is top\n");
252 printf("\tecx: %d[%lx,%lx]\n", val->interval.stride, val->interval.low, val->interval.high);
264 void BoundFactsCalculator::ThunkBound(BoundFact* curFact, Node::Ptr src, Node::Ptr trg) {
266 // This function checks whether any found thunk is between
267 // the src node and the trg node. If there is any, then we have
268 // extra bound information to be added.
269 ParseAPI::Block *srcBlock;
271 if (src == Node::Ptr())
272 srcBlock = func->entry();
274 SliceNode::Ptr srcNode = boost::static_pointer_cast<SliceNode>(src);
275 srcBlock = srcNode->block();
276 srcAddr = srcNode->addr();
279 SliceNode::Ptr trgNode = boost::static_pointer_cast<SliceNode>(trg);
280 ParseAPI::Block *trgBlock = trgNode->block();
281 Address trgAddr = trgNode->addr();
283 for (auto tit = thunks.begin(); tit != thunks.end(); ++tit) {
284 ParseAPI::Block* thunkBlock = tit->second.block;
285 parsing_printf("\t\tCheck srcAddr at %lx, trgAddr at %lx, thunk at %lx\n", srcAddr, trgAddr, tit->first);
286 if (src != Node::Ptr()) {
287 if (srcBlock == thunkBlock) {
288 if (srcAddr > tit->first) continue;
290 if (rf.thunk_ins[thunkBlock].find(srcBlock) == rf.thunk_ins[thunkBlock].end()) continue;
293 if (trgBlock == thunkBlock) {
294 if (trgAddr < tit->first) continue;
296 if (rf.thunk_outs[thunkBlock].find(trgBlock) == rf.thunk_outs[thunkBlock].end()) continue;
299 parsing_printf("\t\t\tfind thunk at %lx between the source and the target. Add fact", tit->first);
300 BoundValue *bv = new BoundValue(tit->second.value);
302 curFact->GenFact(VariableAST::create(Variable(AbsRegion(Absloc(tit->second.reg)))), bv, false);
309 static bool IsConditionalJump(Instruction::Ptr insn) {
310 entryID id = insn->getOperation().getID();
312 if (id == e_jz || id == e_jnz ||
313 id == e_jb || id == e_jnb ||
314 id == e_jbe || id == e_jnbe ||
315 id == e_jb_jnaej_j || id == e_jnb_jae_j ||
316 id == e_jle || id == e_jl ||
317 id == e_jnl || id == e_jnle) return true;
321 BoundFact* BoundFactsCalculator::Meet(Node::Ptr curNode) {
323 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode);
325 EdgeIterator gbegin, gend;
326 curNode->ins(gbegin, gend);
327 BoundFact* newFact = NULL;
330 for (; gbegin != gend; ++gbegin) {
331 TypedSliceEdge::Ptr edge = boost::static_pointer_cast<TypedSliceEdge>(*gbegin);
332 SliceNode::Ptr srcNode = boost::static_pointer_cast<SliceNode>(edge->source());
333 BoundFact *prevFact = GetBoundFact(srcNode);
334 if (prevFact == NULL) {
335 parsing_printf("\t\tIncoming node %lx has not been calculated yet, ignore it\n", srcNode->addr());
338 // Otherwise, create a new copy.
339 // We do not want to overwrite the bound fact
340 // of the predecessor
341 prevFact = new BoundFact(*prevFact);
343 parsing_printf("\t\tMeet incoming edge from %lx\n", srcNode->addr());
344 parsing_printf("\t\tThe fact from %lx before applying transfer function\n", srcNode->addr());
346 if (!srcNode->assign()) {
347 parsing_printf("\t\tThe predecessor node is the virtual entry ndoe\n");
349 // If the indirect jump is in the entry block
350 // of the function, we assume that rax is in
351 // range [0,8] for analyzing the movaps table.
352 // NEED TO HAVE A SAFE WAY TO DO THIS!!
353 parsing_printf("\t\tApplying entry block rax assumption!\n");
355 if (func->entry()->obj()->cs()->getAddressWidth() == 8)
356 axAST = VariableAST::create(Variable(AbsRegion(Absloc(x86_64::rax))));
358 // DOES THIS REALLY SHOW UP IN 32-BIT CODE???
359 axAST = VariableAST::create(Variable(AbsRegion(Absloc(x86::eax))));
360 prevFact->GenFact(axAST, new BoundValue(StridedInterval(1,0,8)), false);
362 } else if (srcNode->assign() && srcNode->assign()->out().absloc().type() == Absloc::Register &&
363 (srcNode->assign()->out().absloc().reg() == x86::zf || srcNode->assign()->out().absloc().reg() == x86_64::zf)) {
364 // zf should be only predecessor of this node
365 parsing_printf("\t\tThe predecessor node is zf assignment!\n");
366 prevFact->SetPredicate(srcNode->assign(), ExpandAssignment(srcNode->assign()) );
367 } else if (srcNode->assign() && IsConditionalJump(srcNode->assign()->insn())) {
368 // If the predecessor is a conditional jump,
369 // we can determine bound fact based on the predicate and the edge type
370 parsing_printf("\t\tThe predecessor node is a conditional jump!\n");
371 if (!prevFact->ConditionalJumpBound(srcNode->assign()->insn(), edge->type())) {
372 fprintf(stderr, "From %lx to %lx\n", srcNode->addr(), node->addr());
376 // The predecessor is not a conditional jump,
377 // then we can determine buond fact based on the src assignment
378 parsing_printf("\t\tThe predecessor node is normal node\n");
379 if (srcNode->assign()) parsing_printf("\t\t\tentry id %d\n", srcNode->assign()->insn()->getOperation().getID());
380 CalcTransferFunction(srcNode, prevFact);
382 ThunkBound(prevFact, srcNode, node);
383 parsing_printf("\t\tFact from %lx after applying transfer function\n", srcNode->addr());
386 // For the first incoming dataflow fact,
388 // We can do this because if an a-loc
389 // is missing in the fact map, we assume
394 newFact->Meet(*prevFact);
401 void BoundFactsCalculator::CalcTransferFunction(Node::Ptr curNode, BoundFact *newFact){
403 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode);
404 AbsRegion &ar = node->assign()->out();
405 Instruction::Ptr insn = node->assign()->insn();
407 parsing_printf("\t\t\tExpanding assignment %s in instruction at %lx: %s.\n", node->assign()->format().c_str(), node->addr(), insn->format().c_str());
409 pair<AST::Ptr, bool> expandRet = ExpandAssignment(node->assign());
411 if (expandRet.first == NULL) {
412 // If the instruction is outside the set of instrutions we
413 // add instruction semantics. We assume this instruction
414 // kills all bound fact.
415 // parsing_printf("\t\t\t No semantic support for this instruction. Kill all bound fact\n");
416 // newFact->SetToBottom();
417 parsing_printf("\t\t\t No semantic support for this instruction. Assume it does not affect jump target calculation. Ignore it (Treat as identity function)\n");
421 parsing_printf("\t\t\t AST after expanding (without simplify) %s\n", expandRet.first->format().c_str());
422 AST::Ptr calculation = SimplifyAnAST(expandRet.first, insn->size());
423 parsing_printf("\t\t\t AST after expanding %s\n", calculation->format().c_str());
425 BoundCalcVisitor bcv(*newFact, node->block());
426 calculation->accept(&bcv);
428 AST::Ptr outAST = VariableAST::create(Variable(ar));
429 if (bcv.IsResultBounded(calculation)) {
430 parsing_printf("\t\t\tGenerate bound fact for %s\n", ar.absloc().format().c_str());
431 newFact->GenFact(outAST, new BoundValue(*bcv.GetResultBound(calculation)), false);
434 parsing_printf("\t\t\tKill bound fact for %s\n", ar.absloc().format().c_str());
435 newFact->KillFact(outAST, false);
437 if (calculation->getID() == AST::V_VariableAST) {
438 // We only track alising between registers
439 parsing_printf("\t\t\t%s and %s are equal\n", calculation->format().c_str(), outAST->format().c_str());
440 newFact->InsertRelation(calculation, outAST, BoundFact::Equal);
442 newFact->AdjustPredicate(outAST, calculation);
444 // Now try to track all aliasing
445 newFact->TrackAlias(DeepCopyAnAST(calculation), ar);
447 parsing_printf("\t\t\tCalculating transfer function: Output facts\n");
452 BoundFact* BoundFactsCalculator::GetBoundFact(Node::Ptr node) {
453 auto fit = boundFacts.find(node);
454 if (fit == boundFacts.end())
460 BoundFactsCalculator::~BoundFactsCalculator() {
461 for (auto fit = boundFacts.begin(); fit != boundFacts.end(); ++fit)
462 if (fit->second != NULL)
467 pair<AST::Ptr, bool> BoundFactsCalculator::ExpandAssignment(Assignment::Ptr assign) {
468 if (expandCache.find(assign) != expandCache.end()) {
469 AST::Ptr ast = expandCache[assign];
470 if (ast) return make_pair(DeepCopyAnAST(ast), true); else return make_pair(ast, false);
472 pair<AST::Ptr, bool> expandRet = SymEval::expand(assign, false);
473 if (expandRet.second && expandRet.first) {
474 expandCache[assign] = DeepCopyAnAST(expandRet.first);
476 expandCache[assign] = AST::Ptr();