A bunch of fixes for indirect control flow analysis
[dyninst.git] / parseAPI / src / IndirectAnalyzer.C
1 #include "dyntypes.h"
2 #include "IndirectAnalyzer.h"
3 #include "BoundFactCalculator.h"
4 #include "BackwardSlicing.h"
5 #include "IA_IAPI.h"
6 #include "debug_parse.h"
7
8 #include "CodeObject.h"
9 #include "Graph.h"
10
11 #include "Instruction.h"
12 #include "InstructionDecoder.h"
13 #include "Register.h"
14 #include "SymEval.h"
15 #define SIGNEX_64_32 0xffffffff00000000LL
16 #define SIGNEX_64_16 0xffffffffffff0000LL
17 #define SIGNEX_64_8  0xffffffffffffff00LL
18 #define SIGNEX_32_16 0xffff0000
19 #define SIGNEX_32_8 0xffffff00
20
21 // Assume the table contain less than this many entries.
22 #define MAX_TABLE_ENTRY 1000000
23 using namespace Dyninst::ParseAPI;
24 using namespace Dyninst::InstructionAPI;
25
26 bool IndirectControlFlowAnalyzer::FillInOutEdges(BoundValue &target, 
27                                                  vector<pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
28
29     Address tableBase = target.interval.low;
30     Address tableLastEntry = target.interval.high;
31     Architecture arch = block->obj()->cs()->getArch();
32     if (arch == Arch_x86) {
33         tableBase &= 0xffffffff;
34         tableLastEntry &= 0xffffffff;
35     }
36
37 #if defined(os_windows)
38     tableBase -= block->obj()->cs()->loadAddress();
39     tableLastEntry -= block->obj()->cs()->loadAddress();
40 #endif
41
42     parsing_printf("The final target bound fact:\n");
43     target.Print();
44 /*
45     if (block->last() == 0x805351a) {
46         printf("Final fact: %d[%lx,%lx]\n", target.interval.stride, target.interval.low, target.interval.high);
47     }
48 */
49     if (!block->obj()->cs()->isValidAddress(tableBase)) {
50         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", tableBase);
51         return false;
52     }
53
54     for (Address tableEntry = tableBase; tableEntry <= tableLastEntry; tableEntry += target.interval.stride) {
55         if (!block->obj()->cs()->isValidAddress(tableEntry)) continue;
56         Address targetAddress = 0;
57         if (target.isTableRead) {
58             // Two assumptions:
59             // 1. Assume the table contents are moved in a sign extended way;
60             // 2. Assume memory access size is the same as the table stride
61             switch (target.interval.stride) {
62                 case 8:
63                     targetAddress = *(const uint64_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
64                     break;
65                 case 4:
66                     targetAddress = *(const uint32_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
67                     if ((arch == Arch_x86_64) && (targetAddress & 0x80000000)) {
68                         targetAddress |= SIGNEX_64_32;
69                     }
70                     break;
71                 case 2:
72                     targetAddress = *(const uint16_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
73                     if ((arch == Arch_x86_64) && (targetAddress & 0x8000)) {
74                         targetAddress |= SIGNEX_64_16;
75                     }
76                     if ((arch == Arch_x86) && (targetAddress & 0x8000)) {
77                         targetAddress |= SIGNEX_32_16;
78                     }
79
80                     break;
81                 case 1:
82                     targetAddress = *(const uint8_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
83                     if ((arch == Arch_x86_64) && (targetAddress & 0x80)) {
84                         targetAddress |= SIGNEX_64_8;
85                     }
86                     if ((arch == Arch_x86) && (targetAddress & 0x80)) {
87                         targetAddress |= SIGNEX_32_8;
88                     }
89
90                     break;
91
92                 default:
93                     parsing_printf("Invalid table stride %d\n", target.interval.stride);
94                     return false;
95             }
96             if (targetAddress != 0) {
97                 if (target.isSubReadContent) 
98                     targetAddress = target.targetBase - targetAddress;
99                 else 
100                     targetAddress += target.targetBase; 
101
102             }
103 #if defined(os_windows)
104             targetAddress -= block->obj()->cs()->loadAddress();
105 #endif
106         } else targetAddress = tableEntry;
107
108         if (block->obj()->cs()->getArch() == Arch_x86) targetAddress &= 0xffffffff;
109         parsing_printf("Jumping to target %lx,", targetAddress);
110         if (block->obj()->cs()->isCode(targetAddress)) {
111             outEdges.push_back(make_pair(targetAddress, INDIRECT));
112             parsing_printf(" is code.\n" );
113         } else {
114             parsing_printf(" not code.\n");
115         }
116     }
117     return true;
118 }
119
120 bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vector<std::pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
121
122 //    if (block->last() != 0x804e30b) return false;
123
124     parsing_printf("Apply indirect control flow analysis at %lx\n", block->last());
125
126 //  Find all blocks that reach the block containing the indirect jump
127 //  This is a prerequisit for finding thunks
128     GetAllReachableBlock();
129 //  Now we try to find all thunks in this function
130     FindAllThunks();
131 //  Calculates all blocks that can reach
132 //  and be reachable from thunk blocks
133     ReachFact rf(thunks);
134     parsing_printf("Calculate backward slice\n");
135
136     BackwardSlicer bs(func, block, block->last());
137     GraphPtr slice =  bs.CalculateBackwardSlicing();
138
139     parsing_printf("Calculate bound facts\n");     
140     BoundFactsCalculator bfc(func, slice, func->entry() == block, rf, thunks, block->last());
141     bfc.CalculateBoundedFacts();
142
143     BoundValue target;
144     bool ijt = IsJumpTable(slice, bfc, target);
145     if (ijt) {
146         return FillInOutEdges(target, outEdges);
147     } else return false;
148 }                                                      
149
150
151
152
153 // Find all blocks that reach the block containing the indirect jump
154 void IndirectControlFlowAnalyzer::GetAllReachableBlock() {
155     reachable.clear();
156     queue<Block*> q;
157     q.push(block);
158     while (!q.empty()) {
159         ParseAPI::Block *cur = q.front();
160         q.pop();
161         if (reachable.find(cur) != reachable.end()) continue;
162         reachable.insert(cur);
163         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
164             if ((*eit)->intraproc()) 
165                 q.push((*eit)->src());
166     }
167
168 }
169
170 bool IndirectControlFlowAnalyzer::IsJumpTable(GraphPtr slice, 
171                                               BoundFactsCalculator &bfc,
172                                               BoundValue &target) {
173     NodeIterator exitBegin, exitEnd, srcBegin, srcEnd;
174     slice->exitNodes(exitBegin, exitEnd);
175     SliceNode::Ptr virtualExit = boost::static_pointer_cast<SliceNode>(*exitBegin);
176     virtualExit->ins(srcBegin, srcEnd);
177     SliceNode::Ptr jumpNode = boost::static_pointer_cast<SliceNode>(*srcBegin);
178     
179     const Absloc &loc = jumpNode->assign()->out().absloc();
180     parsing_printf("Checking final bound fact for %s\n",loc.format().c_str()); 
181     BoundFact *bf = bfc.GetBoundFact(virtualExit);
182     BoundValue *tarBoundValue = bf->GetBound(VariableAST::create(Variable(loc)));
183     if (tarBoundValue != NULL) {
184         target = *(tarBoundValue);
185         uint64_t s = target.interval.size();
186         if (s > 0 && s <= MAX_TABLE_ENTRY) return true;
187     }
188     return false;
189 }
190
191 void IndirectControlFlowAnalyzer::FindAllThunks() {
192     // Enumuerate every block to find thunk
193     for (auto bit = reachable.begin(); bit != reachable.end(); ++bit) {
194         // We intentional treat a getting PC call as a special case that does not
195         // end a basic block. So, we need to check every instruction to find all thunks
196         ParseAPI::Block *b = *bit;
197         const unsigned char* buf =
198             (const unsigned char*)(b->obj()->cs()->getPtrToInstruction(b->start()));
199         if( buf == NULL ) {
200             parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",FILE__, __LINE__);
201             return;
202         }
203         parsing_printf("Looking for thunk in block [%lx,%lx).", b->start(), b->end());
204         InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());
205         InsnAdapter::IA_IAPI block(dec, b->start(), b->obj() , b->region(), b->obj()->cs(), b);
206         while (block.getAddr() < b->end()) {
207             if (block.getInstruction()->getCategory() == c_CallInsn && block.isThunk()) {
208                 bool valid;
209                 Address addr;
210                 boost::tie(valid, addr) = block.getCFT();
211                 const unsigned char *target = (const unsigned char *) b->obj()->cs()->getPtrToInstruction(addr);
212                 InstructionDecoder targetChecker(target, InstructionDecoder::maxInstructionLength, b->obj()->cs()->getArch());
213                 Instruction::Ptr thunkFirst = targetChecker.decode();
214                 set<RegisterAST::Ptr> thunkTargetRegs;
215                 thunkFirst->getWriteSet(thunkTargetRegs);
216                 
217                 for (auto curReg = thunkTargetRegs.begin(); curReg != thunkTargetRegs.end(); ++curReg) {
218                     ThunkInfo t;
219                     t.reg = (*curReg)->getID();
220                     t.value = block.getAddr() + block.getInstruction()->size();
221                     t.block = b;
222                     thunks.insert(make_pair(block.getAddr(), t));
223                     parsing_printf("\tfind thunk at %lx, storing value %lx to %s\n", block.getAddr(), t.value , t.reg.name().c_str());
224                 }
225             }
226             block.advance();
227         }
228     }
229 }
230
231