If the destination operand is a memory location, we need to use the generator of...
[dyninst.git] / parseAPI / src / IndirectAnalyzer.C
1 #include "dyntypes.h"
2 #include "IndirectAnalyzer.h"
3 #include "BoundFactCalculator.h"
4 #include "JumpTablePred.h"
5 #include "IA_IAPI.h"
6 #include "debug_parse.h"
7
8 #include "CodeObject.h"
9 #include "Graph.h"
10
11 #include "Instruction.h"
12 #include "InstructionDecoder.h"
13 #include "Register.h"
14 #include "SymEval.h"
15 #define SIGNEX_64_32 0xffffffff00000000LL
16 #define SIGNEX_64_16 0xffffffffffff0000LL
17 #define SIGNEX_64_8  0xffffffffffffff00LL
18 #define SIGNEX_32_16 0xffff0000
19 #define SIGNEX_32_8 0xffffff00
20
21 // Assume the table contain less than this many entries.
22 #define MAX_TABLE_ENTRY 1000000
23 using namespace Dyninst::ParseAPI;
24 using namespace Dyninst::InstructionAPI;
25
26 bool IndirectControlFlowAnalyzer::FillInOutEdges(BoundValue &target, 
27                                                  vector<pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
28
29     Address tableBase = target.interval.low;
30     Address tableLastEntry = target.interval.high;
31     Architecture arch = block->obj()->cs()->getArch();
32     if (arch == Arch_x86) {
33         tableBase &= 0xffffffff;
34         tableLastEntry &= 0xffffffff;
35     }
36
37 #if defined(os_windows)
38     tableBase -= block->obj()->cs()->loadAddress();
39     tableLastEntry -= block->obj()->cs()->loadAddress();
40 #endif
41
42     parsing_printf("The final target bound fact:\n");
43     target.Print();
44 /*
45     if (block->last() == 0x805351a) {
46         printf("Final fact: %d[%lx,%lx]\n", target.interval.stride, target.interval.low, target.interval.high);
47     }
48 */
49     if (!block->obj()->cs()->isValidAddress(tableBase)) {
50         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", tableBase);
51         return false;
52     }
53
54     for (Address tableEntry = tableBase; tableEntry <= tableLastEntry; tableEntry += target.interval.stride) {
55         if (!block->obj()->cs()->isValidAddress(tableEntry)) continue;
56         Address targetAddress = 0;
57         if (target.isTableRead) {
58             // Two assumptions:
59             // 1. Assume the table contents are moved in a sign extended way;
60             // 2. Assume memory access size is the same as the table stride
61             switch (target.interval.stride) {
62                 case 8:
63                     targetAddress = *(const uint64_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
64                     break;
65                 case 4:
66                     targetAddress = *(const uint32_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
67                     if ((arch == Arch_x86_64) && (targetAddress & 0x80000000)) {
68                         targetAddress |= SIGNEX_64_32;
69                     }
70                     break;
71                 case 2:
72                     targetAddress = *(const uint16_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
73                     if ((arch == Arch_x86_64) && (targetAddress & 0x8000)) {
74                         targetAddress |= SIGNEX_64_16;
75                     }
76                     if ((arch == Arch_x86) && (targetAddress & 0x8000)) {
77                         targetAddress |= SIGNEX_32_16;
78                     }
79
80                     break;
81                 case 1:
82                     targetAddress = *(const uint8_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
83                     if ((arch == Arch_x86_64) && (targetAddress & 0x80)) {
84                         targetAddress |= SIGNEX_64_8;
85                     }
86                     if ((arch == Arch_x86) && (targetAddress & 0x80)) {
87                         targetAddress |= SIGNEX_32_8;
88                     }
89
90                     break;
91
92                 default:
93                     parsing_printf("Invalid table stride %d\n", target.interval.stride);
94                     return false;
95             }
96             if (targetAddress != 0) {
97                 if (target.isSubReadContent) 
98                     targetAddress = target.targetBase - targetAddress;
99                 else 
100                     targetAddress += target.targetBase; 
101
102             }
103 #if defined(os_windows)
104             targetAddress -= block->obj()->cs()->loadAddress();
105 #endif
106         } else targetAddress = tableEntry;
107
108         if (block->obj()->cs()->getArch() == Arch_x86) targetAddress &= 0xffffffff;
109         parsing_printf("Jumping to target %lx,", targetAddress);
110         if (block->obj()->cs()->isCode(targetAddress)) {
111             outEdges.push_back(make_pair(targetAddress, INDIRECT));
112             parsing_printf(" is code.\n" );
113         } else {
114             parsing_printf(" not code.\n");
115         }
116         // If the jump target is resolved to be a constant, 
117         if (target.interval.stride == 0) break;
118     }
119     return true;
120 }
121
122 bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vector<std::pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
123
124 //    if (block->last() != 0x80a922d) return false;
125 //    parsing_printf("Apply indirect control flow analysis at %lx\n", block->last());
126       fprintf(stderr,"Apply indirect control flow analysis at %lx\n", block->last());
127
128 //    parsing_printf("Calculate backward slice\n");
129
130 //    BackwardSlicer bs(func, block, block->last());
131 //    GraphPtr slice =  bs.CalculateBackwardSlicing();
132
133     parsing_printf("Looking for thunk\n");
134 //  Find all blocks that reach the block containing the indirect jump
135 //  This is a prerequisit for finding thunks
136     GetAllReachableBlock();
137 //  Now we try to find all thunks in this function.
138 //  We pass in the slice because we may need to add new ndoes.
139 //    FindAllThunks(slice);
140 //  Calculates all blocks that can reach
141 //  and be reachable from thunk blocks
142     ReachFact rf(thunks);
143
144     
145     const unsigned char * buf = (const unsigned char*) block->obj()->cs()->getPtrToInstruction(block->last());
146     InstructionDecoder dec(buf, InstructionDecoder::maxInstructionLength, block->obj()->cs()->getArch());
147     Instruction::Ptr insn = dec.decode();
148     AssignmentConverter ac(true, false);
149     vector<Assignment::Ptr> assignments;
150     ac.convert(insn, block->last(), func, block, assignments);
151     Slicer s(assignments[0], block, func);
152     
153     JumpTablePred jtp(func, block, rf, thunks, outEdges);
154     GraphPtr slice = s.backwardSlice(jtp);
155
156     return !outEdges.empty();
157 }                                                      
158
159
160
161
162 // Find all blocks that reach the block containing the indirect jump
163 void IndirectControlFlowAnalyzer::GetAllReachableBlock() {
164     reachable.clear();
165     queue<Block*> q;
166     q.push(block);
167     while (!q.empty()) {
168         ParseAPI::Block *cur = q.front();
169         q.pop();
170         if (reachable.find(cur) != reachable.end()) continue;
171         reachable.insert(cur);
172         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
173             if ((*eit)->intraproc()) 
174                 q.push((*eit)->src());
175     }
176
177 }
178
179 bool IndirectControlFlowAnalyzer::IsJumpTable(GraphPtr slice, 
180                                               BoundFactsCalculator &bfc,
181                                               BoundValue &target) {
182     NodeIterator exitBegin, exitEnd, srcBegin, srcEnd;
183     slice->exitNodes(exitBegin, exitEnd);
184     SliceNode::Ptr virtualExit = boost::static_pointer_cast<SliceNode>(*exitBegin);
185     virtualExit->ins(srcBegin, srcEnd);
186     SliceNode::Ptr jumpNode = boost::static_pointer_cast<SliceNode>(*srcBegin);
187     
188     const Absloc &loc = jumpNode->assign()->out().absloc();
189     parsing_printf("Checking final bound fact for %s\n",loc.format().c_str()); 
190     BoundFact *bf = bfc.GetBoundFactOut(virtualExit);
191     BoundValue *tarBoundValue = bf->GetBound(VariableAST::create(Variable(loc)));
192     if (tarBoundValue != NULL) {
193         target = *(tarBoundValue);
194         uint64_t s = target.interval.size();
195         if (s > 0 && s <= MAX_TABLE_ENTRY) return true;
196     }
197     return false;
198 }
199
200 static Address ThunkAdjustment(Address afterThunk, MachRegister reg, GraphPtr slice, ParseAPI::Block *b) {
201     // After the call to thunk, there is usually
202     // an add insturction like ADD ebx, OFFSET to adjust
203     // the value coming out of thunk.
204     // This add instruction may not be in the slice.
205     // Here assume that if the next instruction after thunk
206     // is to add a constant value to the thunk register,
207     // we then adjust the value.
208     NodeIterator nbegin, nend;
209     slice->allNodes(nbegin, nend);
210     for (; nbegin != nend; ++nbegin) {
211         SliceNode::Ptr cur = boost::static_pointer_cast<SliceNode>(*nbegin);
212         // If the next instruction is already in the slice,
213         // there is no need to adjust
214         if (cur->addr() == afterThunk) return 0;
215     }
216     
217     const unsigned char* buf = (const unsigned char*) (b->obj()->cs()->getPtrToInstruction(afterThunk));
218     InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());
219     Instruction::Ptr nextInsn = dec.decode();
220     // It has to be an add
221     if (nextInsn->getOperation().getID() != e_add) return 0;
222     vector<Operand> operands;
223     nextInsn->getOperands(operands);
224     RegisterAST::Ptr regAST = boost::dynamic_pointer_cast<RegisterAST>(operands[0].getValue());
225     // The first operand should be a register
226     if (regAST == 0) return 0;
227     if (regAST->getID() != reg) return 0;
228     Result res = operands[1].getValue()->eval();
229     // A not defined result means that
230     // the second operand is not an immediate
231     if (!res.defined) return 0;
232     return res.convert<Address>();
233 }
234
235 void IndirectControlFlowAnalyzer::FindAllThunks(GraphPtr slice) {
236     // Enumuerate every block to find thunk
237     for (auto bit = reachable.begin(); bit != reachable.end(); ++bit) {
238         // We intentional treat a getting PC call as a special case that does not
239         // end a basic block. So, we need to check every instruction to find all thunks
240         ParseAPI::Block *b = *bit;
241         const unsigned char* buf =
242             (const unsigned char*)(b->obj()->cs()->getPtrToInstruction(b->start()));
243         if( buf == NULL ) {
244             parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",FILE__, __LINE__);
245             return;
246         }
247         parsing_printf("Looking for thunk in block [%lx,%lx).", b->start(), b->end());
248         InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());
249         InsnAdapter::IA_IAPI block(dec, b->start(), b->obj() , b->region(), b->obj()->cs(), b);
250         while (block.getAddr() < b->end()) {
251             if (block.getInstruction()->getCategory() == c_CallInsn && block.isThunk()) {
252                 bool valid;
253                 Address addr;
254                 boost::tie(valid, addr) = block.getCFT();
255                 const unsigned char *target = (const unsigned char *) b->obj()->cs()->getPtrToInstruction(addr);
256                 InstructionDecoder targetChecker(target, InstructionDecoder::maxInstructionLength, b->obj()->cs()->getArch());
257                 Instruction::Ptr thunkFirst = targetChecker.decode();
258                 set<RegisterAST::Ptr> thunkTargetRegs;
259                 thunkFirst->getWriteSet(thunkTargetRegs);
260                 
261                 for (auto curReg = thunkTargetRegs.begin(); curReg != thunkTargetRegs.end(); ++curReg) {
262                     ThunkInfo t;
263                     t.reg = (*curReg)->getID();
264                     t.value = block.getAddr() + block.getInstruction()->size();
265                     t.value += ThunkAdjustment(t.value, t.reg, slice, b);
266                     t.block = b;
267                     thunks.insert(make_pair(block.getAddr(), t));
268                     parsing_printf("\tfind thunk at %lx, storing value %lx to %s\n", block.getAddr(), t.value , t.reg.name().c_str());
269                 }
270             }
271             block.advance();
272         }
273     }
274 }
275
276