Add check for stack accesses with unknown offsets.
[dyninst.git] / dyninstAPI / src / StackMod / StackAccess.C
1 /*
2  * See the dyninst/COPYRIGHT file for copyright information.
3  * 
4  * We provide the Paradyn Tools (below described as "Paradyn")
5  * on an AS IS basis, and do not warrant its validity or performance.
6  * We reserve the right to update, modify, or discontinue this
7  * software at any time.  We shall have no obligation to supply such
8  * updates or modifications or any other form of support to you.
9  * 
10  * By your use of Paradyn, you understand and agree that we (or any
11  * other person or entity with proprietary rights in Paradyn) are
12  * under no obligation to provide either maintenance services,
13  * update services, notices of latent defects, or correction of
14  * defects for Paradyn.
15  * 
16  * This library is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU Lesser General Public
18  * License as published by the Free Software Foundation; either
19  * version 2.1 of the License, or (at your option) any later version.
20  * 
21  * This library is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24  * Lesser General Public License for more details.
25  * 
26  * You should have received a copy of the GNU Lesser General Public
27  * License along with this library; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29  */
30
31 #include <sstream>
32
33 #include "debug.h"
34
35 #include "Instruction.h"
36 #include "InstructionDecoder.h"
37 #include "Expression.h"
38 #include "Register.h"
39 #include "Result.h"
40 #include "Dereference.h"
41 #include "Immediate.h"
42 #include "BinaryFunction.h"
43
44 #include "CFG.h"
45
46 #include "slicing.h"
47 #include "SymEval.h"
48
49 #include "StackAccess.h"
50
51 using namespace std;
52 using namespace Dyninst;
53
54 std::string StackAccess::printStackAccessType(StackAccess::StackAccessType t)
55 {
56     switch(t) {
57         case(StackAccess::READ):
58             return "READ";
59         case(StackAccess::WRITE):
60             return "WRITE";
61         case(StackAccess::SAVED):
62             return "SAVED";
63         case(StackAccess::READWRITE):
64             return "READWRITE";
65         case(StackAccess::REGHEIGHT):
66             return "REGHEIGHT";
67         case(StackAccess::DEBUGINFO_LOCAL):
68             return "DEBUGINFO_LOCAL";
69         case(StackAccess::DEBUGINFO_PARAM):
70             return "DEBUGINFO_PARAM";
71         case(StackAccess::UNKNOWN):
72             return "UNKNOWN";
73         case (StackAccess::MISUNDERSTOOD):
74             return "MISUNDERSTOOD";
75         default:
76             return "NOT RECOGNIZED ACCESS TYPE";
77     }
78 }
79
80 std::string StackAccess::format()
81 {
82     std::stringstream ret;
83     ret << "Access to " << _readHeight.height()
84         << " from " << _reg.name()
85         << " (at " << _regHeight.height() << ")"
86         << ", insn disp = " << _disp
87         << ", is " << printStackAccessType(_type);
88     return ret.str();
89 }
90
91 bool isDebugType(StackAccess::StackAccessType t)
92 {
93     return (t==StackAccess::DEBUGINFO_LOCAL ||
94             t==StackAccess::DEBUGINFO_PARAM);
95 }
96
97 int getAccessSize(InstructionAPI::Instruction::Ptr insn)
98 {
99     std::vector<InstructionAPI::Operand> operands;
100     insn->getOperands(operands);
101     int accessSize = 0;
102     for (unsigned i = 0; i < operands.size(); i++) {
103         InstructionAPI::Expression::Ptr value = operands[i].getValue();
104
105         if (accessSize == 0) {
106             accessSize = value->size();
107         } else {
108             accessSize = min(accessSize, value->size());
109         }
110     }
111
112     return accessSize;
113 }
114
115 // FreeBSD is missing a MINLONG and MAXLONG
116 #if defined(os_freebsd)
117 #if defined(arch_64bit)
118 #define MINLONG INT64_MIN
119 #define MAXLONG INT64_MAX
120 #else
121 #define MINLONG INT32_MIN
122 #define MAXLONG INT32_MAX
123 #endif
124 #endif
125 class detectToppedLoc : public InstructionAPI::Visitor {
126 private:
127     typedef std::vector<std::pair<Absloc, StackAnalysis::Height>> Heights;
128     bool defined;
129     bool containsToppedReg;
130     Heights heights;
131
132     std::deque<long> results;  // Stack for calculations
133
134     // Values used for calculations in results
135     static const long top = MAXLONG;
136     static const long bottom = MINLONG;
137     static const long determinable = 0;
138
139 public:
140     detectToppedLoc(Heights &h) : defined(true), containsToppedReg(false),
141         heights(h) {}
142
143     bool isDefined() {
144         return defined && results.size() == 1;
145     }
146
147     bool isBottomed() {
148         assert(isDefined());
149         return containsToppedReg && results.back() == bottom;
150     }
151
152     bool isTopped() {
153         assert(isDefined());
154         return containsToppedReg && results.back() != bottom;
155     }
156
157     virtual void visit(InstructionAPI::BinaryFunction *bf) {
158         if (!defined) return;
159
160         long arg1 = results.back();
161         results.pop_back();
162         long arg2 = results.back();
163         results.pop_back();
164
165         if (bf->isAdd()) {
166             if (arg1 == bottom || arg2 == bottom) {
167                 results.push_back((long) bottom);
168             } else {
169                 results.push_back((long) top);
170             }
171         } else if (bf->isMultiply()) {
172             if (arg1 == bottom) {
173                 if (arg2 != top && arg2 != bottom && arg2 != 1) {
174                     results.push_back((long) top);
175                 } else {
176                     results.push_back((long) bottom);
177                 }
178             } else if (arg2 == bottom) {
179                 if (arg1 != top && arg1 != bottom && arg1 != 1) {
180                     results.push_back((long) top);
181                 } else {
182                     results.push_back((long) bottom);
183                 }
184             } else {
185                 results.push_back((long) top);
186             }
187         } else {
188             defined = false;
189         }
190     }
191
192     virtual void visit(InstructionAPI::Immediate *imm) {
193         if (!defined) return;
194
195         results.push_back(imm->eval().convert<long>());
196     }
197
198     virtual void visit(InstructionAPI::RegisterAST *r) {
199         if (!defined) return;
200
201         MachRegister reg = r->getID();
202         if (reg == x86::eip || reg == x86_64::eip || reg == x86_64::rip) {
203             results.push_back((long) determinable);
204             return;
205         }
206
207         Absloc regLoc(reg);
208         for (auto iter = heights.begin(); iter != heights.end(); iter++) {
209             if (regLoc == iter->first) {
210                 if (iter->second.isTop()) {
211                     containsToppedReg = true;
212                     results.push_back((long) top);
213                 } else {
214                     results.push_back((long) bottom);
215                 }
216                 return;
217             }
218         }
219         // If reg isn't in heights, its height is TOP
220         containsToppedReg = true;
221         results.push_back((long) top);
222     }
223
224     virtual void visit(InstructionAPI::Dereference *) {
225         defined = false;
226     }
227 };
228
229 bool getAccesses(ParseAPI::Function* func,
230         ParseAPI::Block* block,
231         Address addr,
232         InstructionAPI::Instruction::Ptr insn,
233         Accesses*& accesses)
234 {
235     bool ret = true;
236
237     stackmods_printf("\t\t getAccesses %s, 0x%lx @ 0x%lx: %s\n",
238         func->name().c_str(), block->start(), addr, insn->format().c_str());
239
240     Architecture arch = insn->getArch();
241     std::set<InstructionAPI::RegisterAST::Ptr> readRegs;
242     insn->getReadSet(readRegs);
243     StackAnalysis sa(func);
244     std::vector<std::pair<Absloc, StackAnalysis::Height> > heights;
245     sa.findDefinedHeights(block, addr, heights);
246
247     if (insn->getOperation().getID() == e_ret_far ||
248         insn->getOperation().getID() == e_ret_near) {
249         return true;
250     }
251
252     for (auto iter = heights.begin(); iter != heights.end(); ++iter) {
253         // Only consider registers, not tracked memory locations
254         if (iter->first.type() != Absloc::Register) continue;
255         MachRegister curReg = iter->first.reg();
256
257         unsigned int gpr;
258         if (arch == Arch_x86) {
259             gpr = x86::GPR;
260         } else if (arch == Arch_x86_64) {
261             gpr = x86_64::GPR;
262         } else {
263             assert(0);
264         }
265
266         // Skip the PC
267         if (curReg == MachRegister::getPC(arch)) {
268             continue;
269         }
270
271         // Skip flags
272         if (curReg.regClass() != gpr) {
273             continue;
274         }
275
276         StackAnalysis::Height curHeight = (*iter).second;
277         StackAccess* access = NULL;
278         if (getMemoryOffset(func,
279                     block,
280                     insn,
281                     addr,
282                     curReg,
283                     curHeight,
284                     access,
285                     arch)) {
286             if (curHeight.isBottom()) {
287                 stackmods_printf("\t\t\t\t INVALID: Found access based on "
288                     "register we don't understand (%s = %s)\n",
289                     curReg.name().c_str(), curHeight.format().c_str());
290                 // Once we've found a bad access, stop looking!
291                 return false;
292             } else {
293                 if (access->readHeight().height() > 20480) {
294                     stackmods_printf("\t\t\t\t Found bogus %s. Skipping.\n",
295                         access->format().c_str());
296                     continue;
297                 }
298                 accesses->insert(make_pair(curReg, access));
299             }
300         }
301     }
302
303     // Fail if any stack heights are being written out to topped locations.
304     // Since StackAnalysis tops loads from unresolved locations, we have to
305     // fail if we write out stack heights to unresolved locations. We also fail
306     // if an accessed location has a stack height base and an unknown offset.
307     if (insn->writesMemory()) {
308         std::set<InstructionAPI::Expression::Ptr> writeOperands;
309         insn->getMemoryWriteOperands(writeOperands);
310         assert(writeOperands.size() == 1);
311
312         detectToppedLoc dtl(heights);
313         (*writeOperands.begin())->apply(&dtl);
314         if (dtl.isTopped()) {
315             // We are writing to a topped location.
316             // Check if any of the registers involved in the write contain
317             // stack heights.
318             for (auto regIter = readRegs.begin(); regIter != readRegs.end();
319                 regIter++) {
320                 Absloc regLoc((*regIter)->getID());
321
322                 for (auto hIter = heights.begin(); hIter != heights.end();
323                     hIter++) {
324                     if (hIter->first == regLoc && !hIter->second.isTop()) {
325                         stackmods_printf("\t\t\t\tINVALID: Writing stack "
326                             "height to topped location\n");
327                         return false;
328                     }
329                 }
330             }
331         } else if (dtl.isBottomed()) {
332             stackmods_printf("\t\t\t\tINVALID: Writing to unknown stack "
333                 "location\n");
334             return false;
335         }
336     } else if (insn->readsMemory()) {
337         std::set<InstructionAPI::Expression::Ptr> readOperands;
338         insn->getMemoryReadOperands(readOperands);
339         for (auto rIter = readOperands.begin(); rIter != readOperands.end();
340             rIter++) {
341             detectToppedLoc dtl(heights);
342             (*rIter)->apply(&dtl);
343             if (dtl.isBottomed()) {
344                 stackmods_printf("\t\t\t\tINVALID: Reading unknown stack "
345                     "location\n");
346                 return false;
347             }
348         }
349     }
350
351     return ret;
352 }
353
354 using namespace InstructionAPI;
355
356 // Copied and modified from parseAPI/src/IA_x86Details.C
357 class zeroAllGPRegisters : public InstructionAPI::Visitor
358 {
359     public:
360         zeroAllGPRegisters(Address ip, ParseAPI::Function* f, ParseAPI::Block* b, InstructionAPI::Instruction::Ptr i, bool z = false) :
361             defined(true), m_ip(ip), func(f), block(b), insn(i), zero(z) {
362                 if (func) {
363                     StackAnalysis tmp(func);
364                     sa = tmp;
365                 }
366             }
367
368         virtual ~zeroAllGPRegisters() {}
369         bool defined;
370         std::deque<long> results;
371         Address m_ip;
372         ParseAPI::Function* func;
373         ParseAPI::Block* block;
374         InstructionAPI::Instruction::Ptr insn;
375         StackAnalysis sa;
376         bool zero;
377         long getResult() {
378             if(results.empty()) return 0;
379             return results.front();
380         }
381         bool isDefined() {
382             return defined && (results.size() == 1);
383         }
384         virtual void visit(BinaryFunction* b)
385         {
386             if(!defined) return;
387             long arg1 = results.back();
388             results.pop_back();
389             long arg2 = results.back();
390             results.pop_back();
391             if(b->isAdd())
392             {
393                 results.push_back(arg1+arg2);
394             }
395             else if(b->isMultiply())
396             {
397                 results.push_back(arg1*arg2);
398             }
399             else
400             {
401                 defined = false;
402             }
403         }
404         virtual void visit(Immediate* i)
405         {
406             if(!defined) return;
407             results.push_back(i->eval().convert<long>());
408         }
409         virtual void visit(RegisterAST* r)
410         {
411             if(!defined) return;
412             if(r->getID() == x86::eip ||
413                r->getID() == x86_64::eip ||
414                r->getID() == x86_64::rip)
415             {
416                 results.push_back(m_ip);
417                 return;
418             }
419
420             if (!zero) {
421                 if (func) {
422                     StackAnalysis::Height height = sa.find(block, m_ip, r->getID());
423                     if (!height.isBottom() && !height.isTop()) {
424                         stackmods_printf("\t\t\t found %s height = %ld\n", r->getID().name().c_str(), height.height());
425                         results.push_back(height.height());
426                         return;
427                     }
428                 }
429             }
430
431             results.push_back(0);
432         }
433         virtual void visit(Dereference* )
434         {
435             defined = false;
436         }
437
438 };
439
440 bool getMemoryOffset(ParseAPI::Function* func,
441         ParseAPI::Block* block,
442         InstructionAPI::InstructionPtr insn,
443         Address addr,
444         MachRegister reg,
445         StackAnalysis::Height height,
446         StackAccess*& ret,
447         Architecture arch)
448 {
449
450     stackmods_printf("\t\t\t getMemoryOffset for %s; checking reg %s = %s\n", insn->format().c_str(), reg.name().c_str(), height.format().c_str());
451
452     bool skipReg = false;
453     bool multChildrenInDeref = false;
454
455     InstructionAPI::RegisterAST* regAST = new InstructionAPI::RegisterAST(reg);
456     InstructionAPI::RegisterAST::Ptr regASTPtr = InstructionAPI::RegisterAST::Ptr(regAST);
457
458     std::vector<InstructionAPI::Operand> operands;
459     insn->getOperands(operands);
460
461     signed long disp = 0;
462     signed long offset = 0;
463     bool isOffsetSet = false;
464
465     // Determine how memory is accessed
466     StackAccess::StackAccessType type = StackAccess::UNKNOWN;
467     if (insn->readsMemory() && insn->writesMemory()) {
468         type = StackAccess::READWRITE;
469     } else if (insn->readsMemory()) {
470         type = StackAccess::READ;
471     } else if (insn->writesMemory()) {
472         type = StackAccess::WRITE;
473     }
474
475     // If memory is not accessed, no need to find an offset
476     if (type == StackAccess::UNKNOWN) {
477         return false;
478     }
479
480     for (unsigned i = 0; i < operands.size(); i++) {
481
482         stackmods_printf("\t\t\t\t operand[%d] = %s\n", i, operands[i].getValue()->format().c_str());
483
484         // Get the read/written register sets
485         bool match = false;
486         if (reg.isValid()) {
487             std::set<InstructionAPI::RegisterAST::Ptr> regsRead;
488             operands[i].getReadSet(regsRead);
489             for (auto iter = regsRead.begin(); iter != regsRead.end(); ++iter) {
490                 InstructionAPI::RegisterAST::Ptr cur = *iter;
491                 if (*cur == *regASTPtr) {
492                     stackmods_printf("\t\t\t\t\t reads reg\n");
493                     match = true;
494                 }
495             }
496             std::set<InstructionAPI::RegisterAST::Ptr> regsWrite;
497             operands[i].getWriteSet(regsWrite);
498             for (auto iter = regsWrite.begin(); iter != regsWrite.end(); ++iter) {
499                 InstructionAPI::RegisterAST::Ptr cur = *iter;
500                 if (*cur == *regASTPtr) {
501                     stackmods_printf("\t\t\t\t\t writes reg\n");
502                     match = true;
503                 }
504             }
505         }
506
507         // If the passed-in register is read/written...
508         if (match || !reg.isValid()) {
509             InstructionAPI::Expression::Ptr val = operands[i].getValue();
510             if (!val) break;
511
512             // Won't find an offset for an immediate
513             if (typeid(*val) == typeid(InstructionAPI::Immediate)) {
514                 continue;
515             }
516
517             // Won't find an offset for a registerAST
518             // However, we do want to record a push (e.g., callee-saved registers)
519             if (!(insn->getOperation().getID() == e_push)) {
520                 if (typeid(*val) == typeid(InstructionAPI::RegisterAST)) {
521                     continue;
522                 }
523             }
524
525             // If we have a dereference, extract the child
526             if (typeid(*val) == typeid(InstructionAPI::Dereference)) {
527                 vector<InstructionAPI::InstructionAST::Ptr> children;
528                 val->getChildren(children);
529                 if (children.size() == 1) {
530                     InstructionAPI::InstructionAST::Ptr child = children.front();
531                     val = boost::dynamic_pointer_cast<InstructionAPI::Expression>(child);
532                 }
533             }
534
535             // Copied from parseAPI/src/IA_x86Details.C IA_x86Details::findTableInsn()
536             zeroAllGPRegisters z(addr, func, block, insn, false);
537             val->apply(&z);
538             if (z.isDefined()) {
539                 zeroAllGPRegisters z2(addr, func, block, insn, true);
540                 val->apply(&z2);
541
542                 isOffsetSet = true;
543                 offset = z.getResult();
544
545                 if ( (z2.isDefined()) &&
546                      ( (z2.getResult()) ||
547                        (z.getResult() == z2.getResult()))) {
548                     disp = z2.getResult();
549                 } else {
550                     disp = 0;
551                 }
552
553                 stackmods_printf("\t\t\t\t found offset %ld, disp = %ld, type = %d\n", offset, disp, type);
554
555                 // Weird check: is there more than one child in the dereference? If not, we may want to skip
556                 vector<InstructionAPI::InstructionAST::Ptr> children2;
557                 val->getChildren(children2);
558                 if (children2.size() > 1) {
559                     stackmods_printf("\t\t\t\t\t Found >1 child in dereference.\n");
560                     multChildrenInDeref = true;
561                 }
562             }
563         }
564     }
565
566     std::stringstream msg;
567     if (isOffsetSet) {
568
569         // Question: Where did this stack height come from?
570         if (reg.isValid() &&
571                 /*!multChildrenInDeref && */
572                 (reg != MachRegister::getStackPointer(arch))) {
573
574
575             bool performSlice = true;
576             if (multChildrenInDeref && (disp==0)) {
577                 // We will never mark this as a skipReg, so avoid slicing
578                 performSlice = false;
579             }
580
581             if (multChildrenInDeref && (type==StackAccess::UNKNOWN)) {
582                 // We will never mark this as a skipReg, so avoid slicing
583                 performSlice = false;
584             }
585
586             if ((insn->getOperation().getID() == e_push)) {
587                 performSlice = false;
588             }
589
590             if (height.isBottom()) {
591                 performSlice = false;
592             }
593
594             if (performSlice) {
595                 std::vector<AbsRegion> ins;
596                 ins.push_back(AbsRegion(Absloc(reg)));
597                 Assignment::Ptr assign = Assignment::Ptr(new Assignment(insn,
598                             addr,
599                             func,
600                             block,
601                             ins,
602                             AbsRegion(Absloc(reg))));
603                 Slicer slicer(assign, block, func);
604                 Slicer::Predicates defaultPredicates;
605                 stackmods_printf("\t\t\t\t Using slicing @ 0x%lx for %s\n", addr, reg.name().c_str());
606                 Graph::Ptr graph = slicer.backwardSlice(defaultPredicates);
607
608                 stackmods_printf("\t\t\t\t\t Slicing complete, graph size = %d\n", graph->size());
609
610                 // Graph must have single entry
611                 NodeIterator exitBegin, exitEnd;
612                 graph->exitNodes(exitBegin, exitEnd);
613
614                 if (*exitBegin == NULL) {
615                     assert(0 && "Could not find exit node for backward slice");
616                 }
617
618                 std::set<Address> visited;
619
620                 // Graph must have straight-line path
621                 NodeIterator inBegin, inEnd;
622                 (*exitBegin)->ins(inBegin, inEnd);
623                 NodeIterator prev = inBegin;
624                 while ((*prev)->hasInEdges()) {
625                     NodeIterator tmp = inBegin;
626
627                     if (tmp != inEnd) {
628                         stackmods_printf("\t\t\t\t\t Next level\n");
629                         stackmods_printf("\t\t\t\t\t\t tmp: %s\n", (*tmp)->format().c_str());
630                     }
631
632                     // Check for cycles
633                     if (visited.find((*tmp)->addr()) != visited.end()) {
634                         skipReg = true;
635                         break;
636                     }
637                     visited.insert((*tmp)->addr());
638
639                     while (tmp != inEnd) {
640                         NodeIterator next = tmp;
641                         next++;
642
643                         if (next == inEnd) {
644                             break;
645                         }
646                         stackmods_printf("\t\t\t\t\t\t next: %s\n", (*next)->format().c_str());
647                         if ((*next)->DOTname().compare("<NULL>") == 0) {
648                             tmp++;
649                             continue;
650                         }
651
652                         // Check for cycles
653                         if (visited.find((*next)->addr()) != visited.end()) {
654                             skipReg = true;
655                             break;
656                         }
657                         visited.insert((*next)->addr());
658
659                         if ((*tmp)->addr() != (*next)->addr()) {
660                             skipReg = true;
661                             break;
662                         }
663                         tmp++;
664                     }
665
666                     if (skipReg) break;
667
668                     if (!(*inBegin)->hasInEdges()) {
669                         break;
670                     }
671                     prev = inBegin;
672                     (*inBegin)->ins(inBegin, inEnd);
673                 }
674
675                 stackmods_printf("\t\t\t\t Finished using backward slice to check for skipReg\n");
676
677             }
678         }
679
680         if (skipReg && multChildrenInDeref && disp==0) {
681             stackmods_printf("\t\t\t\t\t\t unset skipReg (multchildren && disp==0)\n");
682             skipReg = false;
683         }
684
685         if (skipReg && multChildrenInDeref && type==StackAccess::UNKNOWN) {
686             stackmods_printf("\t\t\t\t\t\t unset skipReg (multchildren && StackAccess::UNKNOWN\n");
687             skipReg = false;
688         }
689
690         ret = new StackAccess();
691         ret->setRegHeight(height);
692         ret->setReg(reg);
693         ret->setType(type);
694         ret->setDisp(disp);
695         ret->setSkipReg(skipReg);
696
697         if (ret->disp() && arch == Arch_x86) {
698             // Fix the signed issue
699             signed long fixedOffset;
700             Offset MAX = 0xffffffff;
701             if (disp > (long)MAX/2) {
702                 fixedOffset = -1*(MAX - disp + 1);
703                 disp = fixedOffset;
704                 ret->setDisp(disp);
705             }
706         }
707
708         ret->setReadHeight(StackAnalysis::Height(offset));
709
710         // Stackanalysis is reporting the heights at the start of the
711         // instruction, not the end; for push, we want to record the final
712         // state, which is where the value is actually stored
713         if (insn->getOperation().getID() == e_push) {
714             long width;
715             if (arch == Arch_x86) width = 4;
716             else width = 8;
717             ret->setRegHeight(ret->regHeight() - width);
718             ret->setReadHeight(ret->readHeight() - width);
719             ret->setType(StackAccess::SAVED);
720         }
721     }
722
723     return isOffsetSet;
724 }
725