Fix the bug of treating a constant as a table base when adding two constants
[dyninst.git] / parseAPI / src / IndirectAnalyzer.C
1 #include "dyntypes.h"
2 #include "IndirectControlFlow.h"
3 #include "BackwardSlicing.h"
4 #include "IA_IAPI.h"
5 #include "debug_parse.h"
6
7 #include "CodeObject.h"
8 #include "Graph.h"
9
10 #include "Instruction.h"
11 #include "InstructionDecoder.h"
12 #include "Register.h"
13
14 #define SIGNEX_64_32 0xffffffff00000000LL
15 #define SIGNEX_64_16 0xffffffffffff0000LL
16 #define SIGNEX_64_8  0xffffffffffffff00LL
17 #define SIGNEX_32_16 0xffff0000
18 #define SIGNEX_32_8 0xffffff00
19
20
21 static bool UsePC(Instruction::Ptr insn) {
22     vector<Operand> operands;
23     insn->getOperands(operands);
24     
25     for(unsigned int i=0; i<operands.size();++i) {
26         Operand & op = operands[i];
27         set<RegisterAST::Ptr> regs;
28         op.getReadSet(regs);
29         for (auto rit = regs.begin(); rit != regs.end(); ++rit)
30             if ((*rit)->getID().isPC()) return true;
31
32     }
33     return false;
34 }
35
36 bool IndirectControlFlowAnalyzer::FillInOutEdges(BoundValue &target, 
37                                                  vector<pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
38 #if defined(os_windows)
39     target.tableBase -= block->obj()->cs()->loadAddress();
40 #endif
41
42     if (block->obj()->cs()->getArch() == Arch_x86) target.tableBase &= 0xffffffff;
43     parsing_printf("The final target bound fact:\n");
44     target.Print();
45
46     if (!block->obj()->cs()->isValidAddress(target.tableBase)) {
47         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", target.tableBase);
48         return false;
49     }
50
51     for (int64_t i = 0; i <= target.value; ++i) {
52         Address tableEntry = target.tableBase;
53         if (target.addIndexing) tableEntry += target.coe * i; else tableEntry -= target.coe * i;
54         if (!block->obj()->cs()->isValidAddress(tableEntry)) continue;
55         Address targetAddress = 0;
56         if (target.tableLookup || target.tableOffset) {
57             // Assume the table contents are moved in a sign extended way. Fixme if not.
58             switch (target.coe) {
59                 case 8:
60                     targetAddress = *(const uint64_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
61                     break;
62                 case 4:
63                     targetAddress = *(const uint32_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
64                     if ((sizeof(Address) ==  8) && (targetAddress & 0x80000000)) {
65                         targetAddress |= SIGNEX_64_32;
66                     }
67                     break;
68                 case 2:
69                     targetAddress = *(const uint16_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
70                     if ((sizeof(Address) ==  8) && (targetAddress & 0x8000)) {
71                         targetAddress |= SIGNEX_64_16;
72                     }
73                     if ((sizeof(Address) ==  4) && (targetAddress & 0x8000)) {
74                         targetAddress |= SIGNEX_32_16;
75                     }
76
77                     break;
78                 case 1:
79                     targetAddress = *(const uint8_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
80                     if ((sizeof(Address) ==  8) && (targetAddress & 0x80)) {
81                         targetAddress |= SIGNEX_64_8;
82                     }
83                     if ((sizeof(Address) ==  4) && (targetAddress & 0x80)) {
84                         targetAddress |= SIGNEX_32_8;
85                     }
86
87                     break;
88
89                 default:
90                     parsing_printf("Invalid table stride %d\n", target.coe);
91                     continue;
92             }
93             if (target.tableOffset && targetAddress != 0) {
94                 if (target.addOffset) targetAddress += target.targetBase; else targetAddress = target.targetBase - targetAddress;
95             }
96 #if defined(os_windows)
97         targetAddress -= block->obj()->cs()->loadAddress();
98 #endif
99         } else targetAddress = tableEntry;
100
101         if (block->obj()->cs()->getArch() == Arch_x86) targetAddress &= 0xffffffff;
102         parsing_printf("Jumping to target %lx,", targetAddress);
103         if (block->obj()->cs()->isCode(targetAddress)) {
104             outEdges.push_back(make_pair(targetAddress, INDIRECT));
105             parsing_printf(" is code.\n" );
106         } else {
107             parsing_printf(" not code.\n");
108         }
109     }
110     return true;
111 }
112
113 bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vector<std::pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
114     parsing_printf("Apply indirect control flow analysis at %lx\n", block->last());
115     FindAllConditionalGuards();
116     FindAllThunks();
117     ReachFact rf(guards, thunks);
118     parsing_printf("Calculate backward slice\n");
119
120     BackwardSlicer bs(func, block, block->last(), guards, rf);
121     GraphPtr slice =  bs.CalculateBackwardSlicing();
122     parsing_printf("Calculate bound facts\n");     
123     BoundFactsCalculator bfc(guards, func, slice, func->entry() == block, rf, thunks);
124     bfc.CalculateBoundedFacts();
125
126     BoundValue target;
127     bool ijt = IsJumpTable(slice, bfc, target);
128     if (ijt) {
129         return FillInOutEdges(target, outEdges);
130     } else return false;
131 }                                                      
132
133
134
135 bool IndirectControlFlowAnalyzer::EndWithConditionalJump(ParseAPI::Block * b) {
136
137     const unsigned char * buf = (const unsigned char*) b->obj()->cs()->getPtrToInstruction(b->last());
138     InstructionDecoder dec(buf, b->end() - b->last(), b->obj()->cs()->getArch());
139     Instruction::Ptr insn = dec.decode();
140     entryID id = insn->getOperation().getID();
141
142     if (id == e_jz || id == e_jnz ||
143         id == e_jb || id == e_jnb ||
144         id == e_jbe || id == e_jnbe) return true;
145    
146 //    for (auto eit = b->targets().begin(); eit != b->targets().end(); ++eit)
147 //        if ((*eit)->type() == COND_TAKEN) return true;
148     return false;
149
150 }
151
152 void IndirectControlFlowAnalyzer::GetAllReachableBlock() {
153     reachable.clear();
154     queue<Block*> q;
155     q.push(block);
156     while (!q.empty()) {
157         ParseAPI::Block *cur = q.front();
158         q.pop();
159         if (reachable.find(cur) != reachable.end()) continue;
160         reachable.insert(cur);
161         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
162             if ((*eit)->intraproc()) 
163                 q.push((*eit)->src());
164     }
165
166 }
167
168 void IndirectControlFlowAnalyzer::SaveGuardData(ParseAPI::Block *prev) {
169
170     Address curAddr = prev->start();
171     const unsigned char* buf = (const unsigned char*) prev->obj()->cs()->getPtrToInstruction(prev->start());
172     InstructionDecoder dec(buf, prev->end() - prev->start(), prev->obj()->cs()->getArch());
173     Instruction::Ptr insn;
174     vector<pair<Instruction::Ptr, Address> > insns;
175     
176     while ( (insn = dec.decode()) != NULL ) {
177         insns.push_back(make_pair(insn, curAddr));
178         curAddr += insn->size();
179     }
180     
181     for (auto iit = insns.rbegin(); iit != insns.rend(); ++iit) {
182         insn = iit->first;
183         if (insn->getOperation().getID() == e_cmp || insn->getOperation().getID() == e_test) {
184             guards.insert(GuardData(func, prev, insn, insns.rbegin()->first, iit->second, insns.rbegin()->second));
185             parsing_printf("Find guard and cmp pair: cmp %s, addr %lx, cond jump %s, addr %lx\n", insn->format().c_str(), iit->second, insns.rbegin()->first->format().c_str(), insns.rbegin()->second); 
186             break;
187         }    
188     }
189 }
190
191 void IndirectControlFlowAnalyzer::FindAllConditionalGuards(){
192     set<ParseAPI::Block*> visited;
193     queue<Block*> q;
194     q.push(block);
195     GetAllReachableBlock();
196
197     while (!q.empty()) {
198         ParseAPI::Block * cur = q.front();
199         q.pop();
200         if (visited.find(cur) != visited.end()) continue;
201         visited.insert(cur);
202
203         // Since a guard has the condition that one branch must always reach the indirect jump,
204         // if the current block can reach a block that cannot reach the indirect jump, 
205         // then all the sources of the current block is not post-dominated by the indirect jump.
206         bool postDominate = true;
207         for (auto eit = cur->targets().begin(); eit != cur->targets().end(); ++eit) 
208             if ((*eit)->intraproc() && (*eit)->type() != INDIRECT)
209                 if (reachable.find((*eit)->trg()) == reachable.end()) postDominate = false;
210         if (!postDominate) continue;
211
212         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
213             if ((*eit)->intraproc()) {
214                 ParseAPI::Block* prev = (*eit)->src();
215                 if (EndWithConditionalJump(prev)) {                
216                     SaveGuardData(prev);
217                 }
218                 else {
219                     q.push(prev);
220                 }
221             }
222     }
223 }
224
225
226
227 bool IndirectControlFlowAnalyzer::IsJumpTable(GraphPtr slice, 
228                                               BoundFactsCalculator &bfc,
229                                               BoundValue &target) {
230     NodeIterator sbegin, send;
231     slice->exitNodes(sbegin, send);
232     for (; sbegin != send; ++sbegin) {
233         SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*sbegin);
234         const Absloc &loc = node->assign()->out().absloc();
235         parsing_printf("Checking bound fact at %lx for %s\n",node->addr(), loc.format().c_str()); 
236         BoundFact *bf = bfc.GetBoundFact(*sbegin);
237         if (bf->IsBounded(loc)) {
238             target = *(bf->GetBound(loc));
239
240             if (target.tableLookup) return true;
241             if (target.tableOffset) return true;
242             if (target.type == LessThan && target.CoeBounded() && target.HasTableBase()) return true;
243         }
244     }
245   
246
247     return false;
248 }
249
250 void IndirectControlFlowAnalyzer::FindAllThunks() {
251     for (auto bit = reachable.begin(); bit != reachable.end(); ++bit) {
252         // We intentional treat a getting PC call as a special case that does not
253         // end a basic block. So, we need to check every instruction to find all thunks
254         ParseAPI::Block *b = *bit;
255         const unsigned char* buf =
256             (const unsigned char*)(b->obj()->cs()->getPtrToInstruction(b->start()));
257         if( buf == NULL ) {
258             parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",FILE__, __LINE__);
259             return;
260         }
261         parsing_printf("Looking for thunk in block [%lx,%lx).", b->start(), b->end());
262         InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());
263         InsnAdapter::IA_IAPI block(dec, b->start(), b->obj() , b->region(), b->obj()->cs(), b);
264         while (block.getAddr() < b->end()) {
265             if (block.getInstruction()->getCategory() == c_CallInsn && block.isThunk()) {
266                 bool valid;
267                 Address addr;
268                 boost::tie(valid, addr) = block.getCFT();
269                 const unsigned char *target = (const unsigned char *) b->obj()->cs()->getPtrToInstruction(addr);
270                 InstructionDecoder targetChecker(target, InstructionDecoder::maxInstructionLength, b->obj()->cs()->getArch());
271                 Instruction::Ptr thunkFirst = targetChecker.decode();
272                 set<RegisterAST::Ptr> thunkTargetRegs;
273                 thunkFirst->getWriteSet(thunkTargetRegs);
274                 
275                 for (auto curReg = thunkTargetRegs.begin(); curReg != thunkTargetRegs.end(); ++curReg) {
276                     ThunkInfo t;
277                     t.reg = (*curReg)->getID();
278                     t.value = block.getAddr() + block.getInstruction()->size();
279                     t.block = b;
280                     thunks.insert(make_pair(block.getAddr(), t));
281
282                     parsing_printf("\tfind thunk at %lx, storing value %lx to %s\n", block.getAddr(), t.value , t.reg.name().c_str());
283                 }
284             }
285             block.advance();
286         }
287     }
288 }
289
290 bool IndirectControlFlowPred::endAtPoint(AssignmentPtr ap) {
291         if (ap->insn()->writesMemory()) return true;
292         if (UsePC(ap->insn())) return true;
293         if (ap->insn()->getCategory() == c_CallInsn) return true;
294         return false;
295 }
296
297 bool IndirectControlFlowPred::followCall(ParseAPI::Function* , CallStack_t & , AbsRegion ) {
298
299 /*        ParseAPI::Block * b = callee->entry();
300         vector<pair<Instruction::Ptr, Address> > insns;
301
302         const unsigned char * buf = (const unsigned char*) b->obj()->cs()->getPtrToInstruction(b->start());
303         InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());  
304
305         Instruction::Ptr insn;
306         Address curAddr = b->start();
307         int num = 0;
308         while ((insn = dec.decode()) != NULL) {
309             insns.push_back(make_pair(insn, curAddr));
310             curAddr += insn->size();
311             ++num;
312         }
313
314         if (num != 2) return false;
315
316         if (insns[1].first->getCategory() != c_ReturnInsn) return false;
317         if (insns[0].first->getOperation().getID() != e_mov) return false;
318         printf("followCall to %lx\n", callee->addr());
319 */
320         return false;
321
322 }