Fix table stride for 2bytes table and handle sign extension table read
[dyninst.git] / parseAPI / src / IndirectAnalyzer.C
1 #include "dyntypes.h"
2 #include "IndirectControlFlow.h"
3 #include "BackwardSlicing.h"
4 #include "IA_IAPI.h"
5 #include "debug_parse.h"
6
7 #include "CodeObject.h"
8 #include "Graph.h"
9
10 #include "Instruction.h"
11 #include "InstructionDecoder.h"
12 #include "Register.h"
13
14 #define SIGNEX_64_32 0xffffffff00000000LL
15 #define SIGNEX_64_16 0xffffffffffff0000LL
16 #define SIGNEX_64_8  0xffffffffffffff00LL
17 #define SIGNEX_32_16 0xffff0000
18 #define SIGNEX_32_8 0xffffff00
19
20
21 static bool UsePC(Instruction::Ptr insn) {
22     vector<Operand> operands;
23     insn->getOperands(operands);
24     
25     for(unsigned int i=0; i<operands.size();++i) {
26         Operand & op = operands[i];
27         set<RegisterAST::Ptr> regs;
28         op.getReadSet(regs);
29         for (auto rit = regs.begin(); rit != regs.end(); ++rit)
30             if ((*rit)->getID().isPC()) return true;
31
32     }
33     return false;
34 }
35
36 bool IndirectControlFlowAnalyzer::FillInOutEdges(BoundValue &target, 
37                                                  vector<pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
38     
39     if (block->obj()->cs()->getArch() == Arch_x86) target.tableBase &= 0xffffffff;
40     parsing_printf("The final target bound fact:\n");
41     target.Print();
42
43     if (!block->obj()->cs()->isValidAddress(target.tableBase)) {
44         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", target.tableBase);
45         return false;
46     }
47
48     for (int64_t i = 0; i <= target.value; ++i) {
49         Address tableEntry = target.tableBase;
50         if (target.addIndexing) tableEntry += target.coe * i; else tableEntry -= target.coe * i;
51         if (!block->obj()->cs()->isValidAddress(tableEntry)) continue;
52         Address targetAddress = 0;
53         if (target.tableLookup || target.tableOffset) {
54             // Assume the table contents are moved in a sign extended way. Fixme if not.
55             switch (target.coe) {
56                 case 8:
57                     targetAddress = *(const uint64_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
58                     break;
59                 case 4:
60                     targetAddress = *(const uint32_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
61                     if ((sizeof(Address) ==  8) && (targetAddress & 0x80000000)) {
62                         targetAddress |= SIGNEX_64_32;
63                     }
64                     break;
65                 case 2:
66                     targetAddress = *(const uint16_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
67                     if ((sizeof(Address) ==  8) && (targetAddress & 0x8000)) {
68                         targetAddress |= SIGNEX_64_16;
69                     }
70                     if ((sizeof(Address) ==  4) && (targetAddress & 0x8000)) {
71                         targetAddress |= SIGNEX_32_16;
72                     }
73
74                     break;
75                 case 1:
76                     targetAddress = *(const uint8_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
77                     if ((sizeof(Address) ==  8) && (targetAddress & 0x80)) {
78                         targetAddress |= SIGNEX_64_8;
79                     }
80                     if ((sizeof(Address) ==  4) && (targetAddress & 0x80)) {
81                         targetAddress |= SIGNEX_32_8;
82                     }
83
84                     break;
85
86                 default:
87                     parsing_printf("Invalid table stride %d\n", target.coe);
88                     continue;
89             }
90 #if defined(os_windows)
91             targetAddress -= block->obj()->cs()->loadAddress();
92 #endif
93             if (target.tableOffset && targetAddress != 0) {
94                 if (target.addOffset) targetAddress += target.targetBase; else targetAddress = target.targetBase - targetAddress;
95             }
96         } else targetAddress = tableEntry;
97
98         if (block->obj()->cs()->getArch() == Arch_x86) targetAddress &= 0xffffffff;
99         parsing_printf("Jumping to target %lx,", targetAddress);
100         if (block->obj()->cs()->isCode(targetAddress)) {
101             outEdges.push_back(make_pair(targetAddress, INDIRECT));
102             parsing_printf(" is code.\n" );
103         } else {
104             parsing_printf(" not code.\n");
105         }
106     }
107     return true;
108 }
109
110 bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vector<std::pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
111     parsing_printf("Apply indirect control flow analysis at %lx\n", block->last());
112     FindAllConditionalGuards();
113     FindAllThunks();
114     ReachFact rf(guards, thunks);
115     parsing_printf("Calculate backward slice\n");
116
117     BackwardSlicer bs(func, block, block->last(), guards, rf);
118     GraphPtr slice =  bs.CalculateBackwardSlicing();
119     parsing_printf("Calculate bound facts\n");     
120     BoundFactsCalculator bfc(guards, func, slice, func->entry() == block, rf, thunks);
121     bfc.CalculateBoundedFacts();
122
123     BoundValue target;
124     bool ijt = IsJumpTable(slice, bfc, target);
125     if (ijt) {
126         return FillInOutEdges(target, outEdges);
127     } else return false;
128 }                                                      
129
130
131
132 bool IndirectControlFlowAnalyzer::EndWithConditionalJump(ParseAPI::Block * b) {
133
134     const unsigned char * buf = (const unsigned char*) b->obj()->cs()->getPtrToInstruction(b->last());
135     InstructionDecoder dec(buf, b->end() - b->last(), b->obj()->cs()->getArch());
136     Instruction::Ptr insn = dec.decode();
137     entryID id = insn->getOperation().getID();
138
139     if (id == e_jz || id == e_jnz ||
140         id == e_jb || id == e_jnb ||
141         id == e_jbe || id == e_jnbe) return true;
142    
143 //    for (auto eit = b->targets().begin(); eit != b->targets().end(); ++eit)
144 //        if ((*eit)->type() == COND_TAKEN) return true;
145     return false;
146
147 }
148
149 void IndirectControlFlowAnalyzer::GetAllReachableBlock() {
150     reachable.clear();
151     queue<Block*> q;
152     q.push(block);
153     while (!q.empty()) {
154         ParseAPI::Block *cur = q.front();
155         q.pop();
156         if (reachable.find(cur) != reachable.end()) continue;
157         reachable.insert(cur);
158         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
159             if ((*eit)->intraproc()) 
160                 q.push((*eit)->src());
161     }
162
163 }
164
165 void IndirectControlFlowAnalyzer::SaveGuardData(ParseAPI::Block *prev) {
166
167     Address curAddr = prev->start();
168     const unsigned char* buf = (const unsigned char*) prev->obj()->cs()->getPtrToInstruction(prev->start());
169     InstructionDecoder dec(buf, prev->end() - prev->start(), prev->obj()->cs()->getArch());
170     Instruction::Ptr insn;
171     vector<pair<Instruction::Ptr, Address> > insns;
172     
173     while ( (insn = dec.decode()) != NULL ) {
174         insns.push_back(make_pair(insn, curAddr));
175         curAddr += insn->size();
176     }
177     
178     for (auto iit = insns.rbegin(); iit != insns.rend(); ++iit) {
179         insn = iit->first;
180         if (insn->getOperation().getID() == e_cmp || insn->getOperation().getID() == e_test) {
181             guards.insert(GuardData(func, prev, insn, insns.rbegin()->first, iit->second, insns.rbegin()->second));
182             parsing_printf("Find guard and cmp pair: cmp %s, addr %lx, cond jump %s, addr %lx\n", insn->format().c_str(), iit->second, insns.rbegin()->first->format().c_str(), insns.rbegin()->second); 
183             break;
184         }    
185     }
186 }
187
188 void IndirectControlFlowAnalyzer::FindAllConditionalGuards(){
189     set<ParseAPI::Block*> visited;
190     queue<Block*> q;
191     q.push(block);
192     GetAllReachableBlock();
193
194     while (!q.empty()) {
195         ParseAPI::Block * cur = q.front();
196         q.pop();
197         if (visited.find(cur) != visited.end()) continue;
198         visited.insert(cur);
199
200         // Since a guard has the condition that one branch must always reach the indirect jump,
201         // if the current block can reach a block that cannot reach the indirect jump, 
202         // then all the sources of the current block is not post-dominated by the indirect jump.
203         bool postDominate = true;
204         for (auto eit = cur->targets().begin(); eit != cur->targets().end(); ++eit) 
205             if ((*eit)->intraproc() && (*eit)->type() != INDIRECT)
206                 if (reachable.find((*eit)->trg()) == reachable.end()) postDominate = false;
207         if (!postDominate) continue;
208
209         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
210             if ((*eit)->intraproc()) {
211                 ParseAPI::Block* prev = (*eit)->src();
212                 if (EndWithConditionalJump(prev)) {                
213                     SaveGuardData(prev);
214                 }
215                 else {
216                     q.push(prev);
217                 }
218             }
219     }
220 }
221
222
223
224 bool IndirectControlFlowAnalyzer::IsJumpTable(GraphPtr slice, 
225                                               BoundFactsCalculator &bfc,
226                                               BoundValue &target) {
227     NodeIterator sbegin, send;
228     slice->exitNodes(sbegin, send);
229     for (; sbegin != send; ++sbegin) {
230         SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*sbegin);
231         const Absloc &loc = node->assign()->out().absloc();
232         parsing_printf("Checking bound fact at %lx for %s\n",node->addr(), loc.format().c_str()); 
233         BoundFact *bf = bfc.GetBoundFact(*sbegin);
234         if (bf->IsBounded(loc)) {
235             target = *(bf->GetBound(loc));
236
237             if (target.tableLookup) return true;
238             if (target.tableOffset) return true;
239             if (target.type == LessThan && target.CoeBounded() && target.HasTableBase()) return true;
240         }
241     }
242   
243
244     return false;
245 }
246
247 void IndirectControlFlowAnalyzer::FindAllThunks() {
248     for (auto bit = reachable.begin(); bit != reachable.end(); ++bit) {
249         // We intentional treat a getting PC call as a special case that does not
250         // end a basic block. So, we need to check every instruction to find all thunks
251         ParseAPI::Block *b = *bit;
252         const unsigned char* buf =
253             (const unsigned char*)(b->obj()->cs()->getPtrToInstruction(b->start()));
254         if( buf == NULL ) {
255             parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",FILE__, __LINE__);
256             return;
257         }
258         parsing_printf("Looking for thunk in block [%lx,%lx).", b->start(), b->end());
259         InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());
260         InsnAdapter::IA_IAPI block(dec, b->start(), b->obj() , b->region(), b->obj()->cs(), b);
261         while (block.getAddr() < b->end()) {
262             if (block.getInstruction()->getCategory() == c_CallInsn && block.isThunk()) {
263                 bool valid;
264                 Address addr;
265                 boost::tie(valid, addr) = block.getCFT();
266                 const unsigned char *target = (const unsigned char *) b->obj()->cs()->getPtrToInstruction(addr);
267                 InstructionDecoder targetChecker(target, InstructionDecoder::maxInstructionLength, b->obj()->cs()->getArch());
268                 Instruction::Ptr thunkFirst = targetChecker.decode();
269                 set<RegisterAST::Ptr> thunkTargetRegs;
270                 thunkFirst->getWriteSet(thunkTargetRegs);
271                 
272                 for (auto curReg = thunkTargetRegs.begin(); curReg != thunkTargetRegs.end(); ++curReg) {
273                     ThunkInfo t;
274                     t.reg = (*curReg)->getID();
275                     t.value = block.getAddr() + block.getInstruction()->size();
276                     t.block = b;
277                     thunks.insert(make_pair(block.getAddr(), t));
278
279                     parsing_printf("\tfind thunk at %lx, storing value %lx to %s\n", block.getAddr(), t.value , t.reg.name().c_str());
280                 }
281             }
282             block.advance();
283         }
284     }
285 }
286
287 bool IndirectControlFlowPred::endAtPoint(AssignmentPtr ap) {
288         if (ap->insn()->writesMemory()) return true;
289         if (UsePC(ap->insn())) return true;
290         if (ap->insn()->getCategory() == c_CallInsn) return true;
291         return false;
292 }
293
294 bool IndirectControlFlowPred::followCall(ParseAPI::Function* , CallStack_t & , AbsRegion ) {
295
296 /*        ParseAPI::Block * b = callee->entry();
297         vector<pair<Instruction::Ptr, Address> > insns;
298
299         const unsigned char * buf = (const unsigned char*) b->obj()->cs()->getPtrToInstruction(b->start());
300         InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());  
301
302         Instruction::Ptr insn;
303         Address curAddr = b->start();
304         int num = 0;
305         while ((insn = dec.decode()) != NULL) {
306             insns.push_back(make_pair(insn, curAddr));
307             curAddr += insn->size();
308             ++num;
309         }
310
311         if (num != 2) return false;
312
313         if (insns[1].first->getCategory() != c_ReturnInsn) return false;
314         if (insns[0].first->getOperation().getID() != e_mov) return false;
315         printf("followCall to %lx\n", callee->addr());
316 */
317         return false;
318
319 }