Fit code handling thunk into new analysis code
[dyninst.git] / parseAPI / src / IndirectAnalyzer.C
1 #include "dyntypes.h"
2 #include "IndirectControlFlow.h"
3 #include "BackwardSlicing.h"
4 #include "IA_IAPI.h"
5 #include "debug_parse.h"
6
7 #include "CodeObject.h"
8 #include "Graph.h"
9
10 #include "Instruction.h"
11 #include "InstructionDecoder.h"
12 #include "Register.h"
13 #include "SymEval.h"
14 #define SIGNEX_64_32 0xffffffff00000000LL
15 #define SIGNEX_64_16 0xffffffffffff0000LL
16 #define SIGNEX_64_8  0xffffffffffffff00LL
17 #define SIGNEX_32_16 0xffff0000
18 #define SIGNEX_32_8 0xffffff00
19
20 // Assume the table contain less than this many entries.
21 #define MAX_TABLE_ENTRY 1000000
22
23 bool IndirectControlFlowAnalyzer::FillInOutEdges(BoundValue &target, 
24                                                  vector<pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
25
26     Address tableBase = target.interval.low;
27     Address tableLastEntry = target.interval.high;
28     Architecture arch = block->obj()->cs()->getArch();
29     if (arch == Arch_x86) {
30         tableBase &= 0xffffffff;
31         tableLastEntry &= 0xffffffff;
32     }
33
34 #if defined(os_windows)
35     tableBase -= block->obj()->cs()->loadAddress();
36     tableLastEntry -= block->obj()->cs()->loadAddress();
37 #endif
38
39     parsing_printf("The final target bound fact:\n");
40     target.Print();
41
42     if (!block->obj()->cs()->isValidAddress(tableBase)) {
43         parsing_printf("\ttableBase 0x%lx invalid, returning false\n", tableBase);
44         return false;
45     }
46
47     for (Address tableEntry = tableBase; tableEntry <= tableLastEntry; tableEntry += target.interval.stride) {
48         if (!block->obj()->cs()->isValidAddress(tableEntry)) continue;
49         Address targetAddress = 0;
50         if (target.isTableRead) {
51             // Two assumptions:
52             // 1. Assume the table contents are moved in a sign extended way;
53             // 2. Assume memory access size is the same as the table stride
54             switch (target.interval.stride) {
55                 case 8:
56                     targetAddress = *(const uint64_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
57                     break;
58                 case 4:
59                     targetAddress = *(const uint32_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
60                     if ((arch == Arch_x86_64) && (targetAddress & 0x80000000)) {
61                         targetAddress |= SIGNEX_64_32;
62                     }
63                     break;
64                 case 2:
65                     targetAddress = *(const uint16_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
66                     if ((arch == Arch_x86_64) && (targetAddress & 0x8000)) {
67                         targetAddress |= SIGNEX_64_16;
68                     }
69                     if ((arch == Arch_x86) && (targetAddress & 0x8000)) {
70                         targetAddress |= SIGNEX_32_16;
71                     }
72
73                     break;
74                 case 1:
75                     targetAddress = *(const uint8_t *) block->obj()->cs()->getPtrToInstruction(tableEntry);
76                     if ((arch == Arch_x86_64) && (targetAddress & 0x80)) {
77                         targetAddress |= SIGNEX_64_8;
78                     }
79                     if ((arch == Arch_x86) && (targetAddress & 0x80)) {
80                         targetAddress |= SIGNEX_32_8;
81                     }
82
83                     break;
84
85                 default:
86                     parsing_printf("Invalid table stride %d\n", target.interval.stride);
87                     return false;
88             }
89             if (targetAddress != 0) {
90                 if (target.isSubReadContent) 
91                     targetAddress = target.targetBase - targetAddress;
92                 else 
93                     targetAddress += target.targetBase; 
94
95             }
96 #if defined(os_windows)
97             targetAddress -= block->obj()->cs()->loadAddress();
98 #endif
99         } else targetAddress = tableEntry;
100
101         if (block->obj()->cs()->getArch() == Arch_x86) targetAddress &= 0xffffffff;
102         parsing_printf("Jumping to target %lx,", targetAddress);
103         if (block->obj()->cs()->isCode(targetAddress)) {
104             outEdges.push_back(make_pair(targetAddress, INDIRECT));
105             parsing_printf(" is code.\n" );
106         } else {
107             parsing_printf(" not code.\n");
108         }
109     }
110     return true;
111 }
112
113 bool IndirectControlFlowAnalyzer::NewJumpTableAnalysis(std::vector<std::pair< Address, Dyninst::ParseAPI::EdgeTypeEnum > >& outEdges) {
114
115 //    if (block->last() != 0x403cf0) return false;
116
117     parsing_printf("Apply indirect control flow analysis at %lx\n", block->last());
118
119 //  Find all blocks that reach the block containing the indirect jump
120 //  This is a prerequisit for finding thunks
121     GetAllReachableBlock();
122 //  Now we try to find all thunks in this function
123     FindAllThunks();
124 //  Calculates all blocks that can reach
125 //  and be reachable from thunk blocks
126     ReachFact rf(thunks);
127     parsing_printf("Calculate backward slice\n");
128
129     BackwardSlicer bs(func, block, block->last());
130     GraphPtr slice =  bs.CalculateBackwardSlicing();
131
132     parsing_printf("Calculate bound facts\n");     
133     BoundFactsCalculator bfc(func, slice, func->entry() == block, rf, thunks);
134     bfc.CalculateBoundedFacts();
135
136     BoundValue target;
137     bool ijt = IsJumpTable(slice, bfc, target);
138     if (ijt) {
139         return FillInOutEdges(target, outEdges);
140     } else return false;
141 }                                                      
142
143
144
145
146 // Find all blocks that reach the block containing the indirect jump
147 void IndirectControlFlowAnalyzer::GetAllReachableBlock() {
148     reachable.clear();
149     queue<Block*> q;
150     q.push(block);
151     while (!q.empty()) {
152         ParseAPI::Block *cur = q.front();
153         q.pop();
154         if (reachable.find(cur) != reachable.end()) continue;
155         reachable.insert(cur);
156         for (auto eit = cur->sources().begin(); eit != cur->sources().end(); ++eit)
157             if ((*eit)->intraproc()) 
158                 q.push((*eit)->src());
159     }
160
161 }
162
163 bool IndirectControlFlowAnalyzer::IsJumpTable(GraphPtr slice, 
164                                               BoundFactsCalculator &bfc,
165                                               BoundValue &target) {
166     NodeIterator exitBegin, exitEnd, srcBegin, srcEnd;
167     slice->exitNodes(exitBegin, exitEnd);
168     SliceNode::Ptr virtualExit = boost::static_pointer_cast<SliceNode>(*exitBegin);
169     virtualExit->ins(srcBegin, srcEnd);
170     SliceNode::Ptr jumpNode = boost::static_pointer_cast<SliceNode>(*srcBegin);
171     
172     const Absloc &loc = jumpNode->assign()->out().absloc();
173     parsing_printf("Checking final bound fact for %s\n",loc.format().c_str()); 
174     BoundFact *bf = bfc.GetBoundFact(virtualExit);
175     BoundValue *tarBoundValue = bf->GetBound(VariableAST::create(Variable(loc)));
176     if (tarBoundValue != NULL) {
177         target = *(tarBoundValue);
178         uint64_t s = target.interval.size();
179         if (s > 0 && s <= MAX_TABLE_ENTRY) return true;
180     }
181     return false;
182 }
183
184 void IndirectControlFlowAnalyzer::FindAllThunks() {
185     // Enumuerate every block to find thunk
186     for (auto bit = reachable.begin(); bit != reachable.end(); ++bit) {
187         // We intentional treat a getting PC call as a special case that does not
188         // end a basic block. So, we need to check every instruction to find all thunks
189         ParseAPI::Block *b = *bit;
190         const unsigned char* buf =
191             (const unsigned char*)(b->obj()->cs()->getPtrToInstruction(b->start()));
192         if( buf == NULL ) {
193             parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n",FILE__, __LINE__);
194             return;
195         }
196         parsing_printf("Looking for thunk in block [%lx,%lx).", b->start(), b->end());
197         InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch());
198         InsnAdapter::IA_IAPI block(dec, b->start(), b->obj() , b->region(), b->obj()->cs(), b);
199         while (block.getAddr() < b->end()) {
200             if (block.getInstruction()->getCategory() == c_CallInsn && block.isThunk()) {
201                 bool valid;
202                 Address addr;
203                 boost::tie(valid, addr) = block.getCFT();
204                 const unsigned char *target = (const unsigned char *) b->obj()->cs()->getPtrToInstruction(addr);
205                 InstructionDecoder targetChecker(target, InstructionDecoder::maxInstructionLength, b->obj()->cs()->getArch());
206                 Instruction::Ptr thunkFirst = targetChecker.decode();
207                 set<RegisterAST::Ptr> thunkTargetRegs;
208                 thunkFirst->getWriteSet(thunkTargetRegs);
209                 
210                 for (auto curReg = thunkTargetRegs.begin(); curReg != thunkTargetRegs.end(); ++curReg) {
211                     ThunkInfo t;
212                     t.reg = (*curReg)->getID();
213                     t.value = block.getAddr() + block.getInstruction()->size();
214                     t.block = b;
215                     thunks.insert(make_pair(block.getAddr(), t));
216                     parsing_printf("\tfind thunk at %lx, storing value %lx to %s\n", block.getAddr(), t.value , t.reg.name().c_str());
217                 }
218             }
219             block.advance();
220         }
221     }
222 }
223
224