2 #include "CodeObject.h"
3 #include "CodeSource.h"
5 #include "BoundFactCalculator.h"
6 #include "IndirectASTVisitor.h"
7 #include "debug_parse.h"
8 #include "BackwardSlicing.h"
9 #include "Instruction.h"
11 using namespace Dyninst::InstructionAPI;
13 void BoundFactsCalculator::NaturalDFS(Node::Ptr cur) {
15 NodeIterator nbegin, nend;
16 cur->outs(nbegin, nend);
18 for (; nbegin != nend; ++nbegin)
19 if (nodeColor.find(*nbegin) == nodeColor.end())
22 reverseOrder.push_back(cur);
26 void BoundFactsCalculator::ReverseDFS(Node::Ptr cur) {
28 analysisOrder[cur] = orderStamp;
29 NodeIterator nbegin, nend;
30 cur->ins(nbegin, nend);
32 for (; nbegin != nend; ++nbegin)
33 if (nodeColor.find(*nbegin) == nodeColor.end())
37 static void BuildEdgeFromVirtualEntry(SliceNode::Ptr virtualEntry,
38 ParseAPI::Block *curBlock,
39 map<ParseAPI::Block*, vector<SliceNode::Ptr> >&targetMap,
40 set<ParseAPI::Block*> &visit,
42 if (targetMap.find(curBlock) != targetMap.end()) {
43 const vector<SliceNode::Ptr> &targets = targetMap[curBlock];
44 for (auto nit = targets.begin(); nit != targets.end(); ++nit) {
45 SliceNode::Ptr trgNode = *nit;
46 slice->insertPair(virtualEntry, trgNode, TypedSliceEdge::create(virtualEntry, trgNode, FALLTHROUGH));
50 if (visit.find(curBlock) != visit.end()) return;
51 visit.insert(curBlock);
52 for (auto eit = curBlock->targets().begin(); eit != curBlock->targets().end(); ++eit)
53 if ((*eit)->type() != CALL && (*eit)->type() != RET) {
54 BuildEdgeFromVirtualEntry(virtualEntry, (*eit)->trg(), targetMap, visit, slice);
58 void BoundFactsCalculator::DetermineAnalysisOrder() {
59 NodeIterator nbegin, nend;
60 slice->allNodes(nbegin, nend);
64 analysisOrder.clear();
65 for (; nbegin != nend; ++nbegin)
66 if (nodeColor.find(*nbegin) == nodeColor.end()) {
71 for (auto nit = reverseOrder.rbegin(); nit != reverseOrder.rend(); ++nit)
72 if (nodeColor.find(*nit) == nodeColor.end()) {
77 // Create a virtual entry node that has
78 // edges into all entry SCCs
79 SliceNode::Ptr virtualEntry = SliceNode::create(Assignment::Ptr(), func->entry(), func);
80 analysisOrder[virtualEntry] = 0;
81 for (int curOrder = 1; curOrder <= orderStamp; ++curOrder) {
82 // First determine all nodes in this SCC
83 vector<Node::Ptr> curNodes;
84 NodeIterator nbegin, nend;
85 slice->allNodes(nbegin, nend);
86 for (; nbegin != nend; ++nbegin) {
87 if (analysisOrder[*nbegin] == curOrder) {
88 curNodes.push_back(*nbegin);
92 // If this SCC does not have any outside edge,
93 // it is an entry SCC and we need to connect
94 // the virtual entry to it
95 if (!HasIncomingEdgesFromLowerLevel(curOrder, curNodes)) {
96 if (curNodes.size() == 1) {
97 // If the SCC has only one node,
98 // we connect the virtual entry to this single node
99 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*(curNodes.begin()));
100 slice->insertPair(virtualEntry, node, TypedSliceEdge::create(virtualEntry, node, FALLTHROUGH));
102 // If there are more than one node in this SCC,
103 // we do a DFS to see which nodes in the SCC can be
104 // reached from the entry of the function without passing
105 // through other nodes in the SCC.
106 // Basically, we only connect edges from the virtual entry
107 // to the entries of the SCC
108 set<ParseAPI::Block*> visit;
109 map<ParseAPI::Block*, vector<SliceNode::Ptr> >targetMap;
110 for (auto nit = curNodes.begin(); nit != curNodes.end(); ++nit) {
111 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*nit);
112 ParseAPI::Block * b = node->block();
113 Address addr = node->addr();
114 if (targetMap.find(b) == targetMap.end()) {
115 targetMap[b].push_back(node);
116 } else if (targetMap[b][0]->addr() > addr) {
117 targetMap[b].clear();
118 targetMap[b].push_back(node);
119 } else if (targetMap[b][0]->addr() == addr) {
120 targetMap[b].push_back(node);
123 BuildEdgeFromVirtualEntry(virtualEntry, virtualEntry->block(), targetMap, visit, slice);
127 fprintf(stderr, "%d\n", nodeColor.size());
128 // if (nodeColor.size() > 100) slice->printDOT("slice.dot");
130 slice->clearEntryNodes();
131 slice->markAsEntryNode(virtualEntry);
134 bool BoundFactsCalculator::HasIncomingEdgesFromLowerLevel(int curOrder, vector<Node::Ptr>& curNodes) {
135 for (auto nit = curNodes.begin(); nit != curNodes.end(); ++nit) {
136 Node::Ptr cur = *nit;
137 NodeIterator nbegin, nend;
138 cur->ins(nbegin, nend);
139 for (; nbegin != nend; ++nbegin)
140 if (analysisOrder[*nbegin] < curOrder) return true;
146 bool BoundFactsCalculator::CalculateBoundedFacts() {
147 /* We use a dataflow analysis to calculate the value bound
148 * of each register and potentially some memory locations.
149 * The key steps of the dataflow analysis are
150 * 1. Determine the analysis order:
151 * First calculate all strongly connected components (SCC)
152 * of the graph. The flow analysis inside a SCC is
153 * iterative. The flow analysis between several SCCs
154 * is done topologically.
155 * 2. For each node, need to calculate the meet and
156 * calculate the transfer function.
157 * 1. The meet should be simply an intersection of all the bounded facts
159 * 2. To calculate the transfer function, we first get the symbolic expression
160 * of the instrution for the node. Then depending on the instruction operation
161 * and the meet result, we know what are still bounded. For example, loading
162 * memory is always unbounded; doing and operation on a register with a constant
163 * makes the register bounded.
166 DetermineAnalysisOrder();
168 if (jumpAddr == 0x4049c1) {
169 slice->printDOT("target_final.dot");
173 queue<Node::Ptr> workingList;
174 unordered_set<Node::Ptr, Node::NodePtrHasher> inQueue;
175 unordered_map<Node::Ptr, int, Node::NodePtrHasher> inQueueLimit;
177 for (int curOrder = 0; curOrder <= orderStamp; ++curOrder) {
178 // We first determine which nodes are
180 vector<Node::Ptr> curNodes;
181 NodeIterator nbegin, nend;
182 slice->allNodes(nbegin, nend);
183 for (; nbegin != nend; ++nbegin) {
184 if (analysisOrder[*nbegin] == curOrder) {
185 curNodes.push_back(*nbegin);
186 workingList.push(*nbegin);
187 inQueue.insert(*nbegin);
191 if (!HasIncomingEdgesFromLowerLevel(curOrder, curNodes)) {
192 // If this SCC is an entry SCC,
193 // we choose a node inside the SCC
194 // and let it be top.
195 // This should only contain the virtual entry node
196 parsing_printf("This SCC does not incoming edges from outside\n");
197 boundFactsIn[curNodes[0]] = new BoundFact();
198 boundFactsOut[curNodes[0]] = new BoundFact();
200 parsing_printf("Starting analysis inside SCC %d\n", curOrder);
201 // We now start iterative analysis inside the SCC
202 while (!workingList.empty()) {
203 Node::Ptr curNode = workingList.front();
205 inQueue.erase(curNode);
207 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode);
208 ++inQueueLimit[curNode];
209 if (inQueueLimit[curNode] > IN_QUEUE_LIMIT) continue;
211 BoundFact* oldFactIn = GetBoundFactIn(curNode);
212 parsing_printf("Calculate Meet for %lx", node->addr());
213 if (node->assign()) {
214 parsing_printf(", insn: %s\n", node->assign()->insn()->format().c_str());
217 if (node->block() == NULL)
218 parsing_printf(", the VirtualExit node\n");
220 parsing_printf(", the VirtualEntry node\n");
223 parsing_printf("\tOld fact for %lx:\n", node->addr());
224 if (oldFactIn == NULL) parsing_printf("\t\t do not exist\n"); else oldFactIn->Print();
225 BoundFact* newFactIn = Meet(curNode);
226 parsing_printf("\tNew fact at %lx\n", node->addr());
227 if (newFactIn != NULL) newFactIn->Print(); else parsing_printf("\t\tNot calculated\n");
228 if (newFactIn != NULL && (oldFactIn == NULL || *oldFactIn != *newFactIn)) {
229 parsing_printf("\tFacts change!\n");
230 if (oldFactIn != NULL) delete oldFactIn;
231 boundFactsIn[curNode] = newFactIn;
232 BoundFact* newFactOut = new BoundFact(*newFactIn);
233 CalcTransferFunction(curNode, newFactOut);
234 if (boundFactsOut.find(curNode) != boundFactsOut.end() && boundFactsOut[curNode] != NULL)
235 delete boundFactsOut[curNode];
236 boundFactsOut[curNode] = newFactOut;
237 curNode->outs(nbegin, nend);
238 for (; nbegin != nend; ++nbegin)
239 // We only add node inside current SCC into the working list
240 if (inQueue.find(*nbegin) == inQueue.end() && analysisOrder[*nbegin] == curOrder) {
241 workingList.push(*nbegin);
242 inQueue.insert(*nbegin);
245 if (newFactIn != NULL) delete newFactIn;
246 parsing_printf("\tFacts do not change!\n");
254 if (jumpAddr == 0x805351a) {
255 VariableAST::Ptr ecx = VariableAST::create(Variable(AbsRegion(Absloc(x86::ecx))));
256 BoundValue *val = newFact->GetBound(ecx);
258 printf("\tecx is top\n");
261 printf("\tecx: %d[%lx,%lx]\n", val->interval.stride, val->interval.low, val->interval.high);
273 void BoundFactsCalculator::ThunkBound( BoundFact*& curFact, Node::Ptr src, Node::Ptr trg, bool &newCopy) {
275 // This function checks whether any found thunk is between
276 // the src node and the trg node. If there is any, then we have
277 // extra bound information to be added.
278 ParseAPI::Block *srcBlock;
280 if (src == Node::Ptr())
281 srcBlock = func->entry();
283 SliceNode::Ptr srcNode = boost::static_pointer_cast<SliceNode>(src);
284 srcBlock = srcNode->block();
285 srcAddr = srcNode->addr();
288 SliceNode::Ptr trgNode = boost::static_pointer_cast<SliceNode>(trg);
289 ParseAPI::Block *trgBlock = trgNode->block();
290 Address trgAddr = trgNode->addr();
293 for (auto tit = thunks.begin(); tit != thunks.end(); ++tit) {
294 ParseAPI::Block* thunkBlock = tit->second.block;
295 parsing_printf("\t\tCheck srcAddr at %lx, trgAddr at %lx, thunk at %lx\n", srcAddr, trgAddr, tit->first);
296 if (src != Node::Ptr()) {
297 if (srcBlock == thunkBlock) {
298 if (srcAddr > tit->first) continue;
300 if (rf.thunk_ins[thunkBlock].find(srcBlock) == rf.thunk_ins[thunkBlock].end()) continue;
303 if (trgBlock == thunkBlock) {
304 if (trgAddr < tit->first) continue;
306 if (rf.thunk_outs[thunkBlock].find(trgBlock) == rf.thunk_outs[thunkBlock].end()) continue;
309 parsing_printf("\t\t\tfind thunk at %lx between the source and the target. Add fact", tit->first);
310 BoundValue *bv = new BoundValue(tit->second.value);
312 if (first && !newCopy) {
314 curFact = new BoundFact(*curFact);
316 curFact->GenFact(VariableAST::create(Variable(AbsRegion(Absloc(tit->second.reg)))), bv, false);
324 static bool IsConditionalJump(Instruction::Ptr insn) {
325 entryID id = insn->getOperation().getID();
327 if (id == e_jz || id == e_jnz ||
328 id == e_jb || id == e_jnb ||
329 id == e_jbe || id == e_jnbe ||
330 id == e_jb_jnaej_j || id == e_jnb_jae_j ||
331 id == e_jle || id == e_jl ||
332 id == e_jnl || id == e_jnle) return true;
336 BoundFact* BoundFactsCalculator::Meet(Node::Ptr curNode) {
338 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode);
340 EdgeIterator gbegin, gend;
341 curNode->ins(gbegin, gend);
342 BoundFact* newFact = NULL;
345 for (; gbegin != gend; ++gbegin) {
346 TypedSliceEdge::Ptr edge = boost::static_pointer_cast<TypedSliceEdge>(*gbegin);
347 SliceNode::Ptr srcNode = boost::static_pointer_cast<SliceNode>(edge->source());
348 BoundFact *prevFact = GetBoundFactOut(srcNode);
349 bool newCopy = false;
350 if (prevFact == NULL) {
351 parsing_printf("\t\tIncoming node %lx has not been calculated yet, ignore it\n", srcNode->addr());
354 // Otherwise, create a new copy.
355 // We do not want to overwrite the bound fact
356 // of the predecessor
357 prevFact = new BoundFact(*prevFact);
359 parsing_printf("\t\tMeet incoming edge from %lx\n", srcNode->addr());
360 parsing_printf("\t\tThe fact from %lx before applying transfer function\n", srcNode->addr());
362 if (!srcNode->assign()) {
363 prevFact = new BoundFact(*prevFact);
365 parsing_printf("\t\tThe predecessor node is the virtual entry ndoe\n");
367 // If the indirect jump is in the entry block
368 // of the function, we assume that rax is in
369 // range [0,8] for analyzing the movaps table.
370 // NEED TO HAVE A SAFE WAY TO DO THIS!!
371 parsing_printf("\t\tApplying entry block rax assumption!\n");
373 if (func->entry()->obj()->cs()->getAddressWidth() == 8)
374 axAST = VariableAST::create(Variable(AbsRegion(Absloc(x86_64::rax))));
376 // DOES THIS REALLY SHOW UP IN 32-BIT CODE???
377 axAST = VariableAST::create(Variable(AbsRegion(Absloc(x86::eax))));
378 prevFact->GenFact(axAST, new BoundValue(StridedInterval(1,0,8)), false);
380 } else if (srcNode->assign() && IsConditionalJump(srcNode->assign()->insn())) {
381 prevFact = new BoundFact(*prevFact);
383 // If the predecessor is a conditional jump,
384 // we can determine bound fact based on the predicate and the edge type
385 parsing_printf("\t\tThe predecessor node is a conditional jump!\n");
386 if (!prevFact->ConditionalJumpBound(srcNode->assign()->insn(), edge->type())) {
387 fprintf(stderr, "From %lx to %lx\n", srcNode->addr(), node->addr());
391 ThunkBound(prevFact, srcNode, node, newCopy);
392 parsing_printf("\t\tFact from %lx after applying transfer function\n", srcNode->addr());
395 // For the first incoming dataflow fact,
397 // We can do this because if an a-loc
398 // is missing in the fact map, we assume
401 if (newCopy) newFact = prevFact; else newFact = new BoundFact(*prevFact);
403 newFact->Meet(*prevFact);
404 if (newCopy) delete prevFact;
410 void BoundFactsCalculator::CalcTransferFunction(Node::Ptr curNode, BoundFact *newFact){
411 SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(curNode);
412 if (!node->assign()) return;
413 if (node->assign() && node->assign()->out().absloc().type() == Absloc::Register &&
414 (node->assign()->out().absloc().reg() == x86::zf || node->assign()->out().absloc().reg() == x86_64::zf)) {
415 // zf should be only predecessor of this node
416 parsing_printf("\t\tThe predecessor node is zf assignment!\n");
417 newFact->SetPredicate(node->assign(), ExpandAssignment(node->assign()) );
420 // The predecessor is not a conditional jump,
421 // then we can determine buond fact based on the src assignment
422 parsing_printf("\t\tThe predecessor node is normal node\n");
423 parsing_printf("\t\t\tentry id %d\n", node->assign()->insn()->getOperation().getID());
425 AbsRegion &ar = node->assign()->out();
426 Instruction::Ptr insn = node->assign()->insn();
428 parsing_printf("\t\t\tExpanding assignment %s in instruction at %lx: %s.\n", node->assign()->format().c_str(), node->addr(), insn->format().c_str());
430 pair<AST::Ptr, bool> expandRet = ExpandAssignment(node->assign());
432 if (expandRet.first == NULL) {
433 // If the instruction is outside the set of instrutions we
434 // add instruction semantics. We assume this instruction
435 // kills all bound fact.
436 // parsing_printf("\t\t\t No semantic support for this instruction. Kill all bound fact\n");
437 // newFact->SetToBottom();
438 parsing_printf("\t\t\t No semantic support for this instruction. Assume it does not affect jump target calculation. Ignore it (Treat as identity function)\n");
442 AST::Ptr calculation = expandRet.first;
443 BoundCalcVisitor bcv(*newFact, node->block());
444 calculation->accept(&bcv);
446 AST::Ptr outAST = VariableAST::create(Variable(ar));
447 if (bcv.IsResultBounded(calculation)) {
448 parsing_printf("\t\t\tGenerate bound fact for %s\n", ar.absloc().format().c_str());
449 newFact->GenFact(outAST, new BoundValue(*bcv.GetResultBound(calculation)), false);
452 parsing_printf("\t\t\tKill bound fact for %s\n", ar.absloc().format().c_str());
453 newFact->KillFact(outAST, false);
455 if (calculation->getID() == AST::V_VariableAST) {
456 // We only track alising between registers
457 parsing_printf("\t\t\t%s and %s are equal\n", calculation->format().c_str(), outAST->format().c_str());
458 newFact->InsertRelation(calculation, outAST, BoundFact::Equal);
460 newFact->AdjustPredicate(outAST, calculation);
462 // Now try to track all aliasing
463 newFact->TrackAlias(DeepCopyAnAST(calculation), ar);
465 parsing_printf("\t\t\tCalculating transfer function: Output facts\n");
470 BoundFact* BoundFactsCalculator::GetBoundFactIn(Node::Ptr node) {
471 auto fit = boundFactsIn.find(node);
472 if (fit == boundFactsIn.end())
477 BoundFact* BoundFactsCalculator::GetBoundFactOut(Node::Ptr node) {
478 auto fit = boundFactsOut.find(node);
479 if (fit == boundFactsOut.end())
484 BoundFactsCalculator::~BoundFactsCalculator() {
485 for (auto fit = boundFactsIn.begin(); fit != boundFactsIn.end(); ++fit)
486 if (fit->second != NULL)
488 boundFactsIn.clear();
489 for (auto fit = boundFactsOut.begin(); fit != boundFactsOut.end(); ++fit)
490 if (fit->second != NULL)
492 boundFactsOut.clear();
496 pair<AST::Ptr, bool> BoundFactsCalculator::ExpandAssignment(Assignment::Ptr assign) {
497 parsing_printf("expand assignment : %s Instruction: %s\n", assign->format().c_str(), assign->insn()->format().c_str());
498 if (expandCache.find(assign) != expandCache.end()) {
499 AST::Ptr ast = expandCache[assign];
500 // if (ast) return make_pair(DeepCopyAnAST(ast), true); else return make_pair(ast, false);
501 if (ast) return make_pair(ast, true); else return make_pair(ast, false);
504 pair<AST::Ptr, bool> expandRet = SymEval::expand(assign, false);
505 if (expandRet.second && expandRet.first) {
506 AST::Ptr calculation = SimplifyAnAST(expandRet.first, assign->insn()->size());
507 //expandCache[assign] = DeepCopyAnAST(expandRet.first);
508 expandCache[assign] = calculation;
510 expandCache[assign] = AST::Ptr();
512 return make_pair( expandCache[assign], expandRet.second );