1. When we backward slice to a return edge and if the user decide to not follow into...
[dyninst.git] / parseAPI / src / IndirectAnalyzer.C
1 #include "dyntypes.h"
2 #include "IndirectControlFlow.h"
3 #include "BackwardSlicing.h"
4 #include "IA_IAPI.h"
5 #include "debug_parse.h"
6
7 #include "CodeObject.h"
8 #include "Graph.h"
9
10 #include "Instruction.h"
11 #include "InstructionDecoder.h"
12 #include "Register.h"
13
14 #define SIGNEX_64_32 0xffffffff00000000LL
15 #define SIGNEX_64_16 0xffffffffffff0000LL
16 #define SIGNEX_64_8  0xffffffffffffff00LL
17 #define SIGNEX_32_16 0xffff0000
18 #define SIGNEX_32_8 0xffffff00
19
20 bool IndirectControlFlowAnalyzer::FillInOutEdges(BoundValue &target, 
21                                                  vector<pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
22 #if defined(os_windows)
23     target.tableBase -= block->obj()->cs()->loadAddress();
24 #endif
25     Architecture arch = block->obj()->cs()->getArch();
26     if (arch == Arch_x86) target.tableBase &= 0xffffffff;
27     parsing_printf("The final target bound fact:\n");
28     target.Print();
29
30     if (!block->obj()->cs()->isValidAddress(target.tableBase)) {
31         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", target.tableBase);
32         return false;
33     }
34
35     for (int64_t i = 0; i <= target.value; ++i) {
36         Address tableEntry = target.tableBase;
37         if (target.addIndexing) tableEntry += target.coe * i; else tableEntry -= target.coe * i;
38         if (!block->obj()->cs()->isValidAddress(tableEntry)) continue;
39         Address targetAddress = 0;
40         if (target.tableLookup || target.tableOffset) {
41             // Assume the table contents are moved in a sign extended way. Fixme if not.
42             switch (target.coe) {
43                 case 8:
44                     targetAddress = *(const uint64_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
45                     break;
46                 case 4:
47                     targetAddress = *(const uint32_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
48                     if ((arch == Arch_x86_64) && (targetAddress & 0x80000000)) {
49                         targetAddress |= SIGNEX_64_32;
50                     }
51                     break;
52                 case 2:
53                     targetAddress = *(const uint16_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
54                     if ((arch == Arch_x86_64) && (targetAddress & 0x8000)) {
55                         targetAddress |= SIGNEX_64_16;
56                     }
57                     if ((arch == Arch_x86) && (targetAddress & 0x8000)) {
58                         targetAddress |= SIGNEX_32_16;
59                     }
60
61                     break;
62                 case 1:
63                     targetAddress = *(const uint8_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
64                     if ((arch == Arch_x86_64) && (targetAddress & 0x80)) {
65                         targetAddress |= SIGNEX_64_8;
66                     }
67                     if ((arch == Arch_x86) && (targetAddress & 0x80)) {
68                         targetAddress |= SIGNEX_32_8;
69                     }
70
71                     break;
72
73                 default:
74                     parsing_printf("Invalid table stride %d\n", target.coe);
75                     continue;
76             }
77             if (target.tableOffset && targetAddress != 0) {
78                 if (target.addOffset) targetAddress += target.targetBase; else targetAddress = target.targetBase - targetAddress;
79             }
80 #if defined(os_windows)
81             targetAddress -= block->obj()->cs()->loadAddress();
82 #endif
83         } else targetAddress = tableEntry;
84
85         if (block->obj()->cs()->getArch() == Arch_x86) targetAddress &= 0xffffffff;
86         parsing_printf("Jumping to target %lx,", targetAddress);
87         if (block->obj()->cs()->isCode(targetAddress)) {
88             outEdges.push_back(make_pair(targetAddress, INDIRECT));
89             parsing_printf(" is code.\n" );
90         } else {
91             parsing_printf(" not code.\n");
92         }
93     }
94     return true;
95 }
96
97 bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vector<std::pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
98
99 //    if (block->last() != 0x4e4ffb) return false;
100
101     parsing_printf("Apply indirect control flow analysis at %lx\n", block->last());
102     FindAllConditionalGuards();
103     FindAllThunks();
104     ReachFact rf(guards, thunks);
105     parsing_printf("Calculate backward slice\n");
106
107     BackwardSlicer bs(func, block, block->last(), guards, rf);
108     GraphPtr slice =  bs.CalculateBackwardSlicing();
109
110     return false;
111
112     parsing_printf("Calculate bound facts\n");     
113     BoundFactsCalculator bfc(guards, func, slice, func->entry() == block, rf, thunks);
114     bfc.CalculateBoundedFacts();
115
116     BoundValue target;
117     bool ijt = IsJumpTable(slice, bfc, target);
118     if (ijt) {
119         return FillInOutEdges(target, outEdges);
120     } else return false;
121 }                                                      
122
123
124
125 bool IndirectControlFlowAnalyzer::EndWithConditionalJump(ParseAPI::Block * b) {
126
127     const unsigned char * buf = (const unsigned char*) b->obj()->cs()->getPtrToInstruction(b->last());
128     InstructionDecoder dec(buf, b->end() - b->last(), b->obj()->cs()->getArch());
129     Instruction::Ptr insn = dec.decode();
130     entryID id = insn->getOperation().getID();
131
132     if (id == e_jz || id == e_jnz ||
133         id == e_jb || id == e_jnb ||
134         id == e_jbe || id == e_jnbe) return true;
135    
136 //    for (auto eit = b->targets().begin(); eit != b->targets().end(); ++eit)
137 //        if ((*eit)->type() == COND_TAKEN) return true;
138     return false;
139
140 }
141
142 void IndirectControlFlowAnalyzer::GetAllReachableBlock() {
143     reachable.clear();
144     queue<Block*> q;
145     q.push(block);
146     while (!q.empty()) {
147         ParseAPI::Block *cur = q.front();
148         q.pop();
149         if (reachable.find(cur) != reachable.end()) continue;
150         reachable.insert(cur);
151         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
152             if ((*eit)->intraproc()) 
153                 q.push((*eit)->src());
154     }
155
156 }
157
158 void IndirectControlFlowAnalyzer::SaveGuardData(ParseAPI::Block *prev) {
159
160     Address curAddr = prev->start();
161     const unsigned char* buf = (const unsigned char*) prev->obj()->cs()->getPtrToInstruction(prev->start());
162     InstructionDecoder dec(buf, prev->end() - prev->start(), prev->obj()->cs()->getArch());
163     Instruction::Ptr insn;
164     vector<pair<Instruction::Ptr, Address> > insns;
165     
166     while ( (insn = dec.decode()) != NULL ) {
167         insns.push_back(make_pair(insn, curAddr));
168         curAddr += insn->size();
169     }
170     
171     for (auto iit = insns.rbegin(); iit != insns.rend(); ++iit) {
172         insn = iit->first;
173         if (insn->getOperation().getID() == e_cmp || insn->getOperation().getID() == e_test) {
174             guards.insert(GuardData(func, prev, insn, insns.rbegin()->first, iit->second, insns.rbegin()->second));
175             parsing_printf("Find guard and cmp pair: cmp %s, addr %lx, cond jump %s, addr %lx\n", insn->format().c_str(), iit->second, insns.rbegin()->first->format().c_str(), insns.rbegin()->second); 
176             break;
177         }    
178     }
179 }
180
181 void IndirectControlFlowAnalyzer::FindAllConditionalGuards(){
182     set<ParseAPI::Block*> visited;
183     queue<Block*> q;
184     q.push(block);
185     GetAllReachableBlock();
186
187     while (!q.empty()) {
188         ParseAPI::Block * cur = q.front();
189         q.pop();
190         if (visited.find(cur) != visited.end()) continue;
191         visited.insert(cur);
192
193         // Since a guard has the condition that one branch must always reach the indirect jump,
194         // if the current block can reach a block that cannot reach the indirect jump, 
195         // then all the sources of the current block is not post-dominated by the indirect jump.
196         bool postDominate = true;
197         for (auto eit = cur->targets().begin(); eit != cur->targets().end(); ++eit) 
198             if ((*eit)->intraproc() && (*eit)->type() != INDIRECT)
199                 if (reachable.find((*eit)->trg()) == reachable.end()) postDominate = false;
200         if (!postDominate) continue;
201
202         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
203             if ((*eit)->intraproc()) {
204                 ParseAPI::Block* prev = (*eit)->src();
205                 if (EndWithConditionalJump(prev)) {                
206                     SaveGuardData(prev);
207                 }
208                 else {
209                     q.push(prev);
210                 }
211             }
212     }
213 }
214
215
216
217 bool IndirectControlFlowAnalyzer::IsJumpTable(GraphPtr slice, 
218                                               BoundFactsCalculator &bfc,
219                                               BoundValue &target) {
220     NodeIterator sbegin, send;
221     slice->exitNodes(sbegin, send);
222     for (; sbegin != send; ++sbegin) {
223         SliceNode::Ptr node = boost::static_pointer_cast<SliceNode>(*sbegin);
224         const Absloc &loc = node->assign()->out().absloc();
225         parsing_printf("Checking bound fact at %lx for %s\n",node->addr(), loc.format().c_str()); 
226         BoundFact *bf = bfc.GetBoundFact(*sbegin);
227         if (bf->IsBounded(loc)) {
228             target = *(bf->GetBound(loc));
229
230             if (target.tableLookup) return true;
231             if (target.tableOffset) return true;
232             if (target.type == LessThan && target.CoeBounded() && target.HasTableBase()) return true;
233         }
234     }
235   
236
237     return false;
238 }
239
240 void IndirectControlFlowAnalyzer::FindAllThunks() {
241     for (auto bit = reachable.begin(); bit != reachable.end(); ++bit) {
242         // We intentional treat a getting PC call as a special case that does not
243         // end a basic block. So, we need to check every instruction to find all thunks
244         ParseAPI::Block *b = *bit;
245         const unsigned char* buf =
246             (const unsigned char*)(b->obj()->cs()->getPtrToInstruction(b->start()));
247         if( buf == NULL ) {
248             parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",FILE__, __LINE__);
249             return;
250         }
251         parsing_printf("Looking for thunk in block [%lx,%lx).", b->start(), b->end());
252         InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());
253         InsnAdapter::IA_IAPI block(dec, b->start(), b->obj() , b->region(), b->obj()->cs(), b);
254         while (block.getAddr() < b->end()) {
255             if (block.getInstruction()->getCategory() == c_CallInsn && block.isThunk()) {
256                 bool valid;
257                 Address addr;
258                 boost::tie(valid, addr) = block.getCFT();
259                 const unsigned char *target = (const unsigned char *) b->obj()->cs()->getPtrToInstruction(addr);
260                 InstructionDecoder targetChecker(target, InstructionDecoder::maxInstructionLength, b->obj()->cs()->getArch());
261                 Instruction::Ptr thunkFirst = targetChecker.decode();
262                 set<RegisterAST::Ptr> thunkTargetRegs;
263                 thunkFirst->getWriteSet(thunkTargetRegs);
264                 
265                 for (auto curReg = thunkTargetRegs.begin(); curReg != thunkTargetRegs.end(); ++curReg) {
266                     ThunkInfo t;
267                     t.reg = (*curReg)->getID();
268                     t.value = block.getAddr() + block.getInstruction()->size();
269                     t.block = b;
270                     thunks.insert(make_pair(block.getAddr(), t));
271
272                     parsing_printf("\tfind thunk at %lx, storing value %lx to %s\n", block.getAddr(), t.value , t.reg.name().c_str());
273                 }
274             }
275             block.advance();
276         }
277     }
278 }
279
280