Handle the case where the thunk adjustment instruction (ADD ebx, offset) is not in...
[dyninst.git] / parseAPI / src / IndirectAnalyzer.C
1 #include "dyntypes.h"
2 #include "IndirectAnalyzer.h"
3 #include "BoundFactCalculator.h"
4 #include "BackwardSlicing.h"
5 #include "IA_IAPI.h"
6 #include "debug_parse.h"
7
8 #include "CodeObject.h"
9 #include "Graph.h"
10
11 #include "Instruction.h"
12 #include "InstructionDecoder.h"
13 #include "Register.h"
14 #include "SymEval.h"
15 #define SIGNEX_64_32 0xffffffff00000000LL
16 #define SIGNEX_64_16 0xffffffffffff0000LL
17 #define SIGNEX_64_8  0xffffffffffffff00LL
18 #define SIGNEX_32_16 0xffff0000
19 #define SIGNEX_32_8 0xffffff00
20
21 // Assume the table contain less than this many entries.
22 #define MAX_TABLE_ENTRY 1000000
23 using namespace Dyninst::ParseAPI;
24 using namespace Dyninst::InstructionAPI;
25
26 bool IndirectControlFlowAnalyzer::FillInOutEdges(BoundValue &target, 
27                                                  vector<pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
28
29     Address tableBase = target.interval.low;
30     Address tableLastEntry = target.interval.high;
31     Architecture arch = block->obj()->cs()->getArch();
32     if (arch == Arch_x86) {
33         tableBase &= 0xffffffff;
34         tableLastEntry &= 0xffffffff;
35     }
36
37 #if defined(os_windows)
38     tableBase -= block->obj()->cs()->loadAddress();
39     tableLastEntry -= block->obj()->cs()->loadAddress();
40 #endif
41
42     parsing_printf("The final target bound fact:\n");
43     target.Print();
44 /*
45     if (block->last() == 0x805351a) {
46         printf("Final fact: %d[%lx,%lx]\n", target.interval.stride, target.interval.low, target.interval.high);
47     }
48 */
49     if (!block->obj()->cs()->isValidAddress(tableBase)) {
50         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", tableBase);
51         return false;
52     }
53
54     for (Address tableEntry = tableBase; tableEntry <= tableLastEntry; tableEntry += target.interval.stride) {
55         if (!block->obj()->cs()->isValidAddress(tableEntry)) continue;
56         Address targetAddress = 0;
57         if (target.isTableRead) {
58             // Two assumptions:
59             // 1. Assume the table contents are moved in a sign extended way;
60             // 2. Assume memory access size is the same as the table stride
61             switch (target.interval.stride) {
62                 case 8:
63                     targetAddress = *(const uint64_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
64                     break;
65                 case 4:
66                     targetAddress = *(const uint32_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
67                     if ((arch == Arch_x86_64) && (targetAddress & 0x80000000)) {
68                         targetAddress |= SIGNEX_64_32;
69                     }
70                     break;
71                 case 2:
72                     targetAddress = *(const uint16_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
73                     if ((arch == Arch_x86_64) && (targetAddress & 0x8000)) {
74                         targetAddress |= SIGNEX_64_16;
75                     }
76                     if ((arch == Arch_x86) && (targetAddress & 0x8000)) {
77                         targetAddress |= SIGNEX_32_16;
78                     }
79
80                     break;
81                 case 1:
82                     targetAddress = *(const uint8_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
83                     if ((arch == Arch_x86_64) && (targetAddress & 0x80)) {
84                         targetAddress |= SIGNEX_64_8;
85                     }
86                     if ((arch == Arch_x86) && (targetAddress & 0x80)) {
87                         targetAddress |= SIGNEX_32_8;
88                     }
89
90                     break;
91
92                 default:
93                     parsing_printf("Invalid table stride %d\n", target.interval.stride);
94                     return false;
95             }
96             if (targetAddress != 0) {
97                 if (target.isSubReadContent) 
98                     targetAddress = target.targetBase - targetAddress;
99                 else 
100                     targetAddress += target.targetBase; 
101
102             }
103 #if defined(os_windows)
104             targetAddress -= block->obj()->cs()->loadAddress();
105 #endif
106         } else targetAddress = tableEntry;
107
108         if (block->obj()->cs()->getArch() == Arch_x86) targetAddress &= 0xffffffff;
109         parsing_printf("Jumping to target %lx,", targetAddress);
110         if (block->obj()->cs()->isCode(targetAddress)) {
111             outEdges.push_back(make_pair(targetAddress, INDIRECT));
112             parsing_printf(" is code.\n" );
113         } else {
114             parsing_printf(" not code.\n");
115         }
116         // If the jump target is resolved to be a constant, 
117         if (target.interval.stride == 0) break;
118     }
119     return true;
120 }
121
122 bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vector<std::pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
123
124 //    if (block->last() != 0x804e30b) return false;
125
126     parsing_printf("Apply indirect control flow analysis at %lx\n", block->last());
127
128     parsing_printf("Calculate backward slice\n");
129
130     BackwardSlicer bs(func, block, block->last());
131     GraphPtr slice =  bs.CalculateBackwardSlicing();
132
133     parsing_printf("Looking for thunk\n");
134 //  Find all blocks that reach the block containing the indirect jump
135 //  This is a prerequisit for finding thunks
136     GetAllReachableBlock();
137 //  Now we try to find all thunks in this function.
138 //  We pass in the slice because we may need to add new ndoes.
139     FindAllThunks(slice);
140 //  Calculates all blocks that can reach
141 //  and be reachable from thunk blocks
142     ReachFact rf(thunks);
143
144     parsing_printf("Calculate bound facts\n");     
145     BoundFactsCalculator bfc(func, slice, func->entry() == block, rf, thunks, block->last());
146     bfc.CalculateBoundedFacts();
147
148     BoundValue target;
149     bool ijt = IsJumpTable(slice, bfc, target);
150     if (ijt) {
151         return FillInOutEdges(target, outEdges);
152     } else return false;
153 }                                                      
154
155
156
157
158 // Find all blocks that reach the block containing the indirect jump
159 void IndirectControlFlowAnalyzer::GetAllReachableBlock() {
160     reachable.clear();
161     queue<Block*> q;
162     q.push(block);
163     while (!q.empty()) {
164         ParseAPI::Block *cur = q.front();
165         q.pop();
166         if (reachable.find(cur) != reachable.end()) continue;
167         reachable.insert(cur);
168         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
169             if ((*eit)->intraproc()) 
170                 q.push((*eit)->src());
171     }
172
173 }
174
175 bool IndirectControlFlowAnalyzer::IsJumpTable(GraphPtr slice, 
176                                               BoundFactsCalculator &bfc,
177                                               BoundValue &target) {
178     NodeIterator exitBegin, exitEnd, srcBegin, srcEnd;
179     slice->exitNodes(exitBegin, exitEnd);
180     SliceNode::Ptr virtualExit = boost::static_pointer_cast<SliceNode>(*exitBegin);
181     virtualExit->ins(srcBegin, srcEnd);
182     SliceNode::Ptr jumpNode = boost::static_pointer_cast<SliceNode>(*srcBegin);
183     
184     const Absloc &loc = jumpNode->assign()->out().absloc();
185     parsing_printf("Checking final bound fact for %s\n",loc.format().c_str()); 
186     BoundFact *bf = bfc.GetBoundFact(virtualExit);
187     BoundValue *tarBoundValue = bf->GetBound(VariableAST::create(Variable(loc)));
188     if (tarBoundValue != NULL) {
189         target = *(tarBoundValue);
190         uint64_t s = target.interval.size();
191         if (s > 0 && s <= MAX_TABLE_ENTRY) return true;
192     }
193     return false;
194 }
195
196 static Address ThunkAdjustment(Address afterThunk, MachRegister reg, GraphPtr slice, ParseAPI::Block *b) {
197     // After the call to thunk, there is usually
198     // an add insturction like ADD ebx, OFFSET to adjust
199     // the value coming out of thunk.
200     // This add instruction may not be in the slice.
201     // Here assume that if the next instruction after thunk
202     // is to add a constant value to the thunk register,
203     // we then adjust the value.
204     NodeIterator nbegin, nend;
205     slice->allNodes(nbegin, nend);
206     for (; nbegin != nend; ++nbegin) {
207         SliceNode::Ptr cur = boost::static_pointer_cast<SliceNode>(*nbegin);
208         // If the next instruction is already in the slice,
209         // there is no need to adjust
210         if (cur->addr() == afterThunk) return 0;
211     }
212     
213     const unsigned char* buf = (const unsigned char*) (b->obj()->cs()->getPtrToInstruction(afterThunk));
214     InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());
215     Instruction::Ptr nextInsn = dec.decode();
216     // It has to be an add
217     if (nextInsn->getOperation().getID() != e_add) return 0;
218     vector<Operand> operands;
219     nextInsn->getOperands(operands);
220     RegisterAST::Ptr regAST = boost::dynamic_pointer_cast<RegisterAST>(operands[0].getValue());
221     // The first operand should be a register
222     if (regAST == 0) return 0;
223     if (regAST->getID() != reg) return 0;
224     Result res = operands[1].getValue()->eval();
225     // A not defined result means that
226     // the second operand is not an immediate
227     if (!res.defined) return 0;
228     return res.convert<Address>();
229 }
230
231 void IndirectControlFlowAnalyzer::FindAllThunks(GraphPtr slice) {
232     // Enumuerate every block to find thunk
233     for (auto bit = reachable.begin(); bit != reachable.end(); ++bit) {
234         // We intentional treat a getting PC call as a special case that does not
235         // end a basic block. So, we need to check every instruction to find all thunks
236         ParseAPI::Block *b = *bit;
237         const unsigned char* buf =
238             (const unsigned char*)(b->obj()->cs()->getPtrToInstruction(b->start()));
239         if( buf == NULL ) {
240             parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",FILE__, __LINE__);
241             return;
242         }
243         parsing_printf("Looking for thunk in block [%lx,%lx).", b->start(), b->end());
244         InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());
245         InsnAdapter::IA_IAPI block(dec, b->start(), b->obj() , b->region(), b->obj()->cs(), b);
246         while (block.getAddr() < b->end()) {
247             if (block.getInstruction()->getCategory() == c_CallInsn && block.isThunk()) {
248                 bool valid;
249                 Address addr;
250                 boost::tie(valid, addr) = block.getCFT();
251                 const unsigned char *target = (const unsigned char *) b->obj()->cs()->getPtrToInstruction(addr);
252                 InstructionDecoder targetChecker(target, InstructionDecoder::maxInstructionLength, b->obj()->cs()->getArch());
253                 Instruction::Ptr thunkFirst = targetChecker.decode();
254                 set<RegisterAST::Ptr> thunkTargetRegs;
255                 thunkFirst->getWriteSet(thunkTargetRegs);
256                 
257                 for (auto curReg = thunkTargetRegs.begin(); curReg != thunkTargetRegs.end(); ++curReg) {
258                     ThunkInfo t;
259                     t.reg = (*curReg)->getID();
260                     t.value = block.getAddr() + block.getInstruction()->size();
261                     t.value += ThunkAdjustment(t.value, t.reg, slice, b);
262                     t.block = b;
263                     thunks.insert(make_pair(block.getAddr(), t));
264                     parsing_printf("\tfind thunk at %lx, storing value %lx to %s\n", block.getAddr(), t.value , t.reg.name().c_str());
265                 }
266             }
267             block.advance();
268         }
269     }
270 }
271
272